From 008affa64559487c6886e27075bb5377ca915b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 16 Jul 2025 04:41:14 +0200 Subject: [PATCH 01/43] build: update to LLVM 21 Closes #20966. --- CMakeLists.txt | 6 +++--- build.zig | 32 +++++++++++++++++--------------- cmake/Findclang.cmake | 9 ++++----- cmake/Findlld.cmake | 38 +++++++++++++++++++------------------- cmake/Findllvm.cmake | 43 +++++++++++++++++++++++-------------------- 5 files changed, 66 insertions(+), 62 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0933a88035..41bc62cead 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,9 +133,9 @@ else() set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig") endif() -find_package(llvm 20) -find_package(clang 20) -find_package(lld 20) +find_package(llvm 21) +find_package(clang 21) +find_package(lld 21) if(ZIG_STATIC_ZLIB) if (MSVC) diff --git a/build.zig b/build.zig index 5696dd44f3..f046e48d86 100644 --- a/build.zig +++ b/build.zig @@ -1176,7 +1176,6 @@ const clang_libs = [_][]const u8{ "clangBasic", "clangEdit", "clangLex", - "clangARCMigrate", "clangRewriteFrontend", "clangRewrite", "clangCrossTU", @@ -1322,30 +1321,31 @@ const llvm_libs = [_][]const u8{ "LLVMOrcTargetProcess", "LLVMOrcShared", "LLVMDWP", + "LLVMDWARFCFIChecker", "LLVMDebugInfoLogicalView", - "LLVMDebugInfoGSYM", "LLVMOption", - "LLVMObjectYAML", "LLVMObjCopy", "LLVMMCA", "LLVMMCDisassembler", "LLVMLTO", - "LLVMPasses", - "LLVMHipStdPar", - "LLVMCFGuard", - "LLVMCoroutines", - "LLVMipo", - "LLVMVectorize", - "LLVMSandboxIR", - "LLVMLinker", - "LLVMInstrumentation", - "LLVMFrontendOpenMP", - "LLVMFrontendOffloading", "LLVMFrontendOpenACC", "LLVMFrontendHLSL", "LLVMFrontendDriver", - "LLVMFrontendAtomic", "LLVMExtensions", + "LLVMPasses", + "LLVMHipStdPar", + "LLVMCoroutines", + "LLVMCFGuard", + "LLVMipo", + "LLVMInstrumentation", + "LLVMVectorize", + "LLVMSandboxIR", + "LLVMLinker", + "LLVMFrontendOpenMP", + "LLVMFrontendDirective", + "LLVMFrontendAtomic", + "LLVMFrontendOffloading", + "LLVMObjectYAML", "LLVMDWARFLinkerParallel", "LLVMDWARFLinkerClassic", "LLVMDWARFLinker", @@ -1374,7 +1374,9 @@ const llvm_libs = [_][]const u8{ "LLVMDebugInfoPDB", "LLVMDebugInfoMSF", "LLVMDebugInfoCodeView", + "LLVMDebugInfoGSYM", "LLVMDebugInfoDWARF", + "LLVMDebugInfoDWARFLowLevel", "LLVMObject", "LLVMTextAPI", "LLVMMCParser", diff --git a/cmake/Findclang.cmake b/cmake/Findclang.cmake index e47d9a0b48..4b7363da9c 100644 --- a/cmake/Findclang.cmake +++ b/cmake/Findclang.cmake @@ -17,10 +17,10 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h if(${LLVM_LINK_MODE} STREQUAL "shared") find_library(CLANG_LIBRARIES NAMES - libclang-cpp.so.20 - libclang-cpp.so.20.1 - clang-cpp-20.0 - clang-cpp200 + libclang-cpp.so.21 + libclang-cpp.so.21.1 + clang-cpp-21.0 + clang-cpp210 clang-cpp NAMES_PER_DIR HINTS "${LLVM_LIBDIRS}" @@ -60,7 +60,6 @@ else() FIND_AND_ADD_CLANG_LIB(clangBasic) FIND_AND_ADD_CLANG_LIB(clangEdit) FIND_AND_ADD_CLANG_LIB(clangLex) - FIND_AND_ADD_CLANG_LIB(clangARCMigrate) FIND_AND_ADD_CLANG_LIB(clangRewriteFrontend) FIND_AND_ADD_CLANG_LIB(clangRewrite) FIND_AND_ADD_CLANG_LIB(clangCrossTU) diff --git a/cmake/Findlld.cmake b/cmake/Findlld.cmake index e6d9ba89ba..61cf1cd883 100644 --- a/cmake/Findlld.cmake +++ b/cmake/Findlld.cmake @@ -9,23 +9,23 @@ find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h HINTS ${LLVM_INCLUDE_DIRS} PATHS - /usr/lib/llvm-20/include - /usr/local/llvm200/include - /usr/local/llvm20/include - /usr/local/opt/lld@20/include - /opt/homebrew/opt/lld@20/include - /home/linuxbrew/.linuxbrew/opt/lld@20/include + /usr/lib/llvm-21/include + /usr/local/llvm210/include + /usr/local/llvm21/include + /usr/local/opt/lld@21/include + /opt/homebrew/opt/lld@21/include + /home/linuxbrew/.linuxbrew/opt/lld@21/include /mingw64/include) -find_library(LLD_LIBRARY NAMES lld-20.0 lld200 lld NAMES_PER_DIR +find_library(LLD_LIBRARY NAMES lld-21.0 lld210 lld NAMES_PER_DIR HINTS ${LLVM_LIBDIRS} PATHS - /usr/lib/llvm-20/lib - /usr/local/llvm200/lib - /usr/local/llvm20/lib - /usr/local/opt/lld@20/lib - /opt/homebrew/opt/lld@20/lib - /home/linuxbrew/.linuxbrew/opt/lld@20/lib + /usr/lib/llvm-21/lib + /usr/local/llvm210/lib + /usr/local/llvm21/lib + /usr/local/opt/lld@21/lib + /opt/homebrew/opt/lld@21/lib + /home/linuxbrew/.linuxbrew/opt/lld@21/lib ) if(EXISTS ${LLD_LIBRARY}) set(LLD_LIBRARIES ${LLD_LIBRARY}) @@ -36,12 +36,12 @@ else() HINTS ${LLVM_LIBDIRS} PATHS ${LLD_LIBDIRS} - /usr/lib/llvm-20/lib - /usr/local/llvm200/lib - /usr/local/llvm20/lib - /usr/local/opt/lld@20/lib - /opt/homebrew/opt/lld@20/lib - /home/linuxbrew/.linuxbrew/opt/lld@20/lib + /usr/lib/llvm-21/lib + /usr/local/llvm210/lib + /usr/local/llvm21/lib + /usr/local/opt/lld@21/lib + /opt/homebrew/opt/lld@21/lib + /home/linuxbrew/.linuxbrew/opt/lld@21/lib /mingw64/lib /c/msys64/mingw64/lib c:/msys64/mingw64/lib) diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake index 5ff5de869c..3ecaaf6809 100644 --- a/cmake/Findllvm.cmake +++ b/cmake/Findllvm.cmake @@ -17,12 +17,12 @@ if(ZIG_USE_LLVM_CONFIG) # terminate when the right LLVM version is not found. unset(LLVM_CONFIG_EXE CACHE) find_program(LLVM_CONFIG_EXE - NAMES llvm-config-20 llvm-config-20.0 llvm-config200 llvm-config20 llvm-config NAMES_PER_DIR + NAMES llvm-config-21 llvm-config-21.0 llvm-config210 llvm-config21 llvm-config NAMES_PER_DIR PATHS "/mingw64/bin" "/c/msys64/mingw64/bin" "c:/msys64/mingw64/bin" - "C:/Libraries/llvm-20.0.0/bin") + "C:/Libraries/llvm-21.0.0/bin") if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND") if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "") @@ -40,9 +40,9 @@ if(ZIG_USE_LLVM_CONFIG) OUTPUT_STRIP_TRAILING_WHITESPACE) get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY) - if("${LLVM_CONFIG_VERSION}" VERSION_LESS 20 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 21 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 21) + if("${LLVM_CONFIG_VERSION}" VERSION_LESS 21 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 22 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 22) # Save the error message, in case this is the last llvm-config we find - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 20.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 21.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") # Ignore this directory and try the search again list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}") @@ -66,9 +66,9 @@ if(ZIG_USE_LLVM_CONFIG) if (LLVM_CONFIG_ERROR) # Save the error message, in case this is the last llvm-config we find if (ZIG_SHARED_LLVM) - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 20.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 21.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library") else() - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 20.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 21.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library") endif() # Ignore this directory and try the search again @@ -315,30 +315,31 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMOrcTargetProcess) FIND_AND_ADD_LLVM_LIB(LLVMOrcShared) FIND_AND_ADD_LLVM_LIB(LLVMDWP) + FIND_AND_ADD_LLVM_LIB(LLVMDWARFCFIChecker) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoLogicalView) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoGSYM) FIND_AND_ADD_LLVM_LIB(LLVMOption) - FIND_AND_ADD_LLVM_LIB(LLVMObjectYAML) FIND_AND_ADD_LLVM_LIB(LLVMObjCopy) FIND_AND_ADD_LLVM_LIB(LLVMMCA) FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler) FIND_AND_ADD_LLVM_LIB(LLVMLTO) - FIND_AND_ADD_LLVM_LIB(LLVMPasses) - FIND_AND_ADD_LLVM_LIB(LLVMHipStdPar) - FIND_AND_ADD_LLVM_LIB(LLVMCFGuard) - FIND_AND_ADD_LLVM_LIB(LLVMCoroutines) - FIND_AND_ADD_LLVM_LIB(LLVMipo) - FIND_AND_ADD_LLVM_LIB(LLVMVectorize) - FIND_AND_ADD_LLVM_LIB(LLVMSandboxIR) - FIND_AND_ADD_LLVM_LIB(LLVMLinker) - FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation) - FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP) - FIND_AND_ADD_LLVM_LIB(LLVMFrontendOffloading) FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC) FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL) FIND_AND_ADD_LLVM_LIB(LLVMFrontendDriver) - FIND_AND_ADD_LLVM_LIB(LLVMFrontendAtomic) FIND_AND_ADD_LLVM_LIB(LLVMExtensions) + FIND_AND_ADD_LLVM_LIB(LLVMPasses) + FIND_AND_ADD_LLVM_LIB(LLVMHipStdPar) + FIND_AND_ADD_LLVM_LIB(LLVMCoroutines) + FIND_AND_ADD_LLVM_LIB(LLVMCFGuard) + FIND_AND_ADD_LLVM_LIB(LLVMipo) + FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation) + FIND_AND_ADD_LLVM_LIB(LLVMVectorize) + FIND_AND_ADD_LLVM_LIB(LLVMSandboxIR) + FIND_AND_ADD_LLVM_LIB(LLVMLinker) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendDirective) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendAtomic) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendOffloading) + FIND_AND_ADD_LLVM_LIB(LLVMObjectYAML) FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerParallel) FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerClassic) FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker) @@ -367,7 +368,9 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoPDB) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoMSF) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoGSYM) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARF) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARFLowLevel) FIND_AND_ADD_LLVM_LIB(LLVMObject) FIND_AND_ADD_LLVM_LIB(LLVMTextAPI) FIND_AND_ADD_LLVM_LIB(LLVMMCParser) From b7a8c045efab89c0fab0d3d1c678442c2b373066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 16 Jul 2025 04:46:03 +0200 Subject: [PATCH 02/43] zig cc: update driver files to LLVM 21 --- src/zig_clang_cc1_main.cpp | 40 ++++++++++++++++++++++++------------ src/zig_clang_cc1as_main.cpp | 25 +++++++++++----------- src/zig_clang_driver.cpp | 24 ++++++++++++++-------- src/zig_llvm-ar.cpp | 2 +- 4 files changed, 57 insertions(+), 34 deletions(-) diff --git a/src/zig_clang_cc1_main.cpp b/src/zig_clang_cc1_main.cpp index 26b5e78cfb..2c17f28621 100644 --- a/src/zig_clang_cc1_main.cpp +++ b/src/zig_clang_cc1_main.cpp @@ -111,9 +111,10 @@ static void ensureSufficientStack() {} /// Print supported cpus of the given target. static int PrintSupportedCPUs(std::string TargetStr) { + llvm::Triple Triple(TargetStr); std::string Error; const llvm::Target *TheTarget = - llvm::TargetRegistry::lookupTarget(TargetStr, Error); + llvm::TargetRegistry::lookupTarget(Triple, Error); if (!TheTarget) { llvm::errs() << Error; return 1; @@ -122,15 +123,16 @@ static int PrintSupportedCPUs(std::string TargetStr) { // the target machine will handle the mcpu printing llvm::TargetOptions Options; std::unique_ptr TheTargetMachine( - TheTarget->createTargetMachine(TargetStr, "", "+cpuhelp", Options, + TheTarget->createTargetMachine(Triple, "", "+cpuhelp", Options, std::nullopt)); return 0; } static int PrintSupportedExtensions(std::string TargetStr) { + llvm::Triple Triple(TargetStr); std::string Error; const llvm::Target *TheTarget = - llvm::TargetRegistry::lookupTarget(TargetStr, Error); + llvm::TargetRegistry::lookupTarget(Triple, Error); if (!TheTarget) { llvm::errs() << Error; return 1; @@ -138,7 +140,7 @@ static int PrintSupportedExtensions(std::string TargetStr) { llvm::TargetOptions Options; std::unique_ptr TheTargetMachine( - TheTarget->createTargetMachine(TargetStr, "", "", Options, std::nullopt)); + TheTarget->createTargetMachine(Triple, "", "", Options, std::nullopt)); const llvm::Triple &MachineTriple = TheTargetMachine->getTargetTriple(); const llvm::MCSubtargetInfo *MCInfo = TheTargetMachine->getMCSubtargetInfo(); const llvm::ArrayRef Features = @@ -165,9 +167,10 @@ static int PrintSupportedExtensions(std::string TargetStr) { } static int PrintEnabledExtensions(const TargetOptions& TargetOpts) { + llvm::Triple Triple(TargetOpts.Triple); std::string Error; const llvm::Target *TheTarget = - llvm::TargetRegistry::lookupTarget(TargetOpts.Triple, Error); + llvm::TargetRegistry::lookupTarget(Triple, Error); if (!TheTarget) { llvm::errs() << Error; return 1; @@ -179,7 +182,8 @@ static int PrintEnabledExtensions(const TargetOptions& TargetOpts) { llvm::TargetOptions BackendOptions; std::string FeaturesStr = llvm::join(TargetOpts.FeaturesAsWritten, ","); std::unique_ptr TheTargetMachine( - TheTarget->createTargetMachine(TargetOpts.Triple, TargetOpts.CPU, FeaturesStr, BackendOptions, std::nullopt)); + TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, + BackendOptions, std::nullopt)); const llvm::Triple &MachineTriple = TheTargetMachine->getTargetTriple(); const llvm::MCSubtargetInfo *MCInfo = TheTargetMachine->getMCSubtargetInfo(); @@ -213,11 +217,10 @@ static int PrintEnabledExtensions(const TargetOptions& TargetOpts) { int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { ensureSufficientStack(); - std::unique_ptr Clang(new CompilerInstance()); IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); // Register the support for object-file-wrapped Clang modules. - auto PCHOps = Clang->getPCHContainerOperations(); + auto PCHOps = std::make_shared(); PCHOps->registerWriter(std::make_unique()); PCHOps->registerReader(std::make_unique()); @@ -229,17 +232,21 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { // Buffer diagnostics from argument parsing so that we can output them using a // well formed diagnostic object. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + DiagnosticOptions DiagOpts; TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); + DiagnosticsEngine Diags(DiagID, DiagOpts, DiagsBuffer); // Setup round-trip remarks for the DiagnosticsEngine used in CreateFromArgs. if (find(Argv, StringRef("-Rround-trip-cc1-args")) != Argv.end()) Diags.setSeverity(diag::remark_cc1_round_trip_generated, diag::Severity::Remark, {}); - bool Success = CompilerInvocation::CreateFromArgs(Clang->getInvocation(), - Argv, Diags, Argv0); + auto Invocation = std::make_shared(); + bool Success = + CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags, Argv0); + + auto Clang = std::make_unique(std::move(Invocation), + std::move(PCHOps)); if (!Clang->getFrontendOpts().TimeTracePath.empty()) { llvm::timeTraceProfilerInitialize( @@ -292,7 +299,14 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { // If any timers were active but haven't been destroyed yet, print their // results now. This happens in -disable-free mode. - llvm::TimerGroup::printAll(llvm::errs()); + std::unique_ptr IOFile = llvm::CreateInfoOutputFile(); + if (Clang->getCodeGenOpts().TimePassesJson) { + *IOFile << "{\n"; + llvm::TimerGroup::printAllJSONValues(*IOFile, ""); + *IOFile << "\n}\n"; + } else { + llvm::TimerGroup::printAll(*IOFile); + } llvm::TimerGroup::clearAll(); if (llvm::timeTraceProfilerEnabled()) { diff --git a/src/zig_clang_cc1as_main.cpp b/src/zig_clang_cc1as_main.cpp index 7fe97cc6e6..f938e7e404 100644 --- a/src/zig_clang_cc1as_main.cpp +++ b/src/zig_clang_cc1as_main.cpp @@ -26,6 +26,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" @@ -541,8 +542,8 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, // FIXME: There is a bit of code duplication with addPassesToEmitFile. if (Opts.OutputType == AssemblerInvocation::FT_Asm) { - MCInstPrinter *IP = TheTarget->createMCInstPrinter( - llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); + std::unique_ptr IP(TheTarget->createMCInstPrinter( + llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI)); std::unique_ptr CE; if (Opts.ShowEncoding) @@ -551,7 +552,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); auto FOut = std::make_unique(*Out); - Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), IP, + Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), std::move(IP), std::move(CE), std::move(MAB))); } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { Str.reset(createNullStreamer(Ctx)); @@ -576,7 +577,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, Triple T(Opts.Triple); Str.reset(TheTarget->createMCObjectStreamer( T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI)); - Str.get()->initSections(Opts.NoExecStack, *STI); + Str->initSections(Opts.NoExecStack, *STI); if (T.isOSBinFormatMachO() && T.isOSDarwin()) { Triple *TVT = Opts.DarwinTargetVariantTriple ? &*Opts.DarwinTargetVariantTriple @@ -591,14 +592,14 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, if (Opts.EmbedBitcode && Ctx.getObjectFileType() == MCContext::IsMachO) { MCSection *AsmLabel = Ctx.getMachOSection( "__LLVM", "__asm", MachO::S_REGULAR, 4, SectionKind::getReadOnly()); - Str.get()->switchSection(AsmLabel); - Str.get()->emitZeros(1); + Str->switchSection(AsmLabel); + Str->emitZeros(1); } bool Failed = false; std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); + createMCAsmParser(SrcMgr, Ctx, *Str, *MAI)); // FIXME: init MCTargetOptions from sanitizer flags here. std::unique_ptr TAP( @@ -618,7 +619,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, } if (!Failed) { - Parser->setTargetParser(*TAP.get()); + Parser->setTargetParser(*TAP); Failed = Parser->Run(Opts.NoInitialTextSection); } @@ -657,12 +658,12 @@ int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { InitializeAllAsmParsers(); // Construct our diagnostic client. - IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(errs(), &*DiagOpts); + DiagnosticOptions DiagOpts; + TextDiagnosticPrinter *DiagClient = + new TextDiagnosticPrinter(errs(), DiagOpts); DiagClient->setPrefix("clang -cc1as"); IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); + DiagnosticsEngine Diags(DiagID, DiagOpts, DiagClient); // Set an error handler, so that any LLVM backend diagnostics go through our // error handler. diff --git a/src/zig_clang_driver.cpp b/src/zig_clang_driver.cpp index 306a898ec9..7f4c6034be 100644 --- a/src/zig_clang_driver.cpp +++ b/src/zig_clang_driver.cpp @@ -153,6 +153,11 @@ static bool SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { } const char *FilteringStr = ::getenv("CC_PRINT_HEADERS_FILTERING"); + if (!FilteringStr) { + TheDriver.Diag(clang::diag::err_drv_print_header_env_var_invalid_format) + << EnvVar; + return false; + } HeaderIncludeFilteringKind Filtering; if (!stringToHeaderIncludeFiltering(FilteringStr, Filtering)) { TheDriver.Diag(clang::diag::err_drv_print_header_env_var) @@ -163,7 +168,7 @@ static bool SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { if ((TheDriver.CCPrintHeadersFormat == HIFMT_Textual && Filtering != HIFIL_None) || (TheDriver.CCPrintHeadersFormat == HIFMT_JSON && - Filtering != HIFIL_Only_Direct_System)) { + Filtering == HIFIL_None)) { TheDriver.Diag(clang::diag::err_drv_print_header_env_var_combination) << EnvVar << FilteringStr; return false; @@ -295,7 +300,7 @@ static int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContex if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { // FIXME: Driver shouldn't take extra initial argument. driver::applyOverrideOptions(Args, OverrideStr, SavedStrings, - &llvm::errs()); + "CCC_OVERRIDE_OPTIONS", &llvm::errs()); } std::string Path = GetExecutablePath(ToolContext.Path, CanonicalPrefixes); @@ -311,21 +316,24 @@ static int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContex .Case("-fintegrated-cc1", false) .Default(UseNewCC1Process); - IntrusiveRefCntPtr DiagOpts = - CreateAndPopulateDiagOpts(Args); + std::unique_ptr DiagOpts = CreateAndPopulateDiagOpts(Args); + // Driver's diagnostics don't use suppression mappings, so don't bother + // parsing them. CC1 still receives full args, so this doesn't impact other + // actions. + DiagOpts->DiagnosticSuppressionMappingsFile.clear(); - TextDiagnosticPrinter *DiagClient - = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); + TextDiagnosticPrinter *DiagClient = + new TextDiagnosticPrinter(llvm::errs(), *DiagOpts); FixupDiagPrefixExeName(DiagClient, ProgName); IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); - DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); + DiagnosticsEngine Diags(DiagID, *DiagOpts, DiagClient); if (!DiagOpts->DiagnosticSerializationFile.empty()) { auto SerializedConsumer = clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, - &*DiagOpts, /*MergeChildRecords=*/true); + *DiagOpts, /*MergeChildRecords=*/true); Diags.setClient(new ChainedDiagnosticConsumer( Diags.takeClient(), std::move(SerializedConsumer))); } diff --git a/src/zig_llvm-ar.cpp b/src/zig_llvm-ar.cpp index 7353ca5616..50478e8dda 100644 --- a/src/zig_llvm-ar.cpp +++ b/src/zig_llvm-ar.cpp @@ -130,7 +130,7 @@ MODIFIERS: << "USAGE: " + ToolName + " [options] [-][modifiers] [relpos] " "[count] [files]\n" - << " " + ToolName + " -M [ Date: Wed, 16 Jul 2025 04:52:10 +0200 Subject: [PATCH 03/43] zig cc: update intrinsic headers to LLVM 21 --- lib/include/__clang_spirv_builtins.h | 217 + lib/include/__stdarg_va_arg.h | 4 +- lib/include/altivec.h | 161 +- lib/include/amxavx512intrin.h | 2 +- lib/include/amxcomplexintrin.h | 10 +- lib/include/amxmovrstransposeintrin.h | 2 +- lib/include/amxtf32transposeintrin.h | 2 +- lib/include/andes_vector.h | 16 + lib/include/arm_acle.h | 52 +- lib/include/arm_fp16.h | 178 +- lib/include/arm_neon.h | 42776 ++++++++++++---------- lib/include/arm_sme.h | 694 + lib/include/arm_sve.h | 4540 +-- lib/include/avx10_2_512bf16intrin.h | 16 +- lib/include/avx10_2_512convertintrin.h | 52 +- lib/include/avx10_2_512satcvtdsintrin.h | 52 +- lib/include/avx10_2_512satcvtintrin.h | 168 +- lib/include/avx10_2bf16intrin.h | 32 +- lib/include/avx10_2convertintrin.h | 2905 +- lib/include/avx10_2minmaxintrin.h | 81 +- lib/include/avx10_2niintrin.h | 1666 - lib/include/avx10_2satcvtdsintrin.h | 320 +- lib/include/avx10_2satcvtintrin.h | 356 +- lib/include/avx512fp16intrin.h | 3 +- lib/include/bmiintrin.h | 4 - lib/include/cpuid.h | 46 +- lib/include/float.h | 13 +- lib/include/immintrin.h | 224 - lib/include/intrin.h | 25 +- lib/include/keylockerintrin.h | 9 - lib/include/llvm_libc_wrappers/stdlib.h | 2 +- lib/include/lzcntintrin.h | 10 +- lib/include/module.modulemap | 13 + lib/include/movrsintrin.h | 2 +- lib/include/prfchwintrin.h | 23 +- lib/include/ptrauth.h | 57 + lib/include/riscv_corev_alu.h | 8 +- lib/include/riscv_vector.h | 4 - lib/include/shaintrin.h | 5 +- lib/include/stdcountof.h | 15 + lib/include/stdint.h | 147 +- lib/include/vecintrin.h | 5 + lib/include/x86gprintrin.h | 14 - lib/include/x86intrin.h | 18 - lib/include/xmmintrin.h | 5 +- 45 files changed, 29569 insertions(+), 25385 deletions(-) create mode 100644 lib/include/__clang_spirv_builtins.h create mode 100644 lib/include/andes_vector.h create mode 100644 lib/include/stdcountof.h diff --git a/lib/include/__clang_spirv_builtins.h b/lib/include/__clang_spirv_builtins.h new file mode 100644 index 0000000000..9915cdfcae --- /dev/null +++ b/lib/include/__clang_spirv_builtins.h @@ -0,0 +1,217 @@ +/*===---- spirv_builtin_vars.h - SPIR-V built-in ---------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __SPIRV_BUILTIN_VARS_H +#define __SPIRV_BUILTIN_VARS_H + +#if __cplusplus >= 201103L +#define __SPIRV_NOEXCEPT noexcept +#else +#define __SPIRV_NOEXCEPT +#endif + +#pragma push_macro("__size_t") +#pragma push_macro("__uint32_t") +#pragma push_macro("__uint64_t") +#define __size_t __SIZE_TYPE__ +#define __uint32_t __UINT32_TYPE__ + +#define __SPIRV_overloadable __attribute__((overloadable)) +#define __SPIRV_convergent __attribute__((convergent)) +#define __SPIRV_inline __attribute__((always_inline)) + +#define __global __attribute__((opencl_global)) +#define __local __attribute__((opencl_local)) +#define __private __attribute__((opencl_private)) +#define __constant __attribute__((opencl_constant)) +#ifdef __SYCL_DEVICE_ONLY__ +#define __generic +#else +#define __generic __attribute__((opencl_generic)) +#endif + +// Check if SPIR-V builtins are supported. +// As the translator doesn't use the LLVM intrinsics (which would be emitted if +// we use the SPIR-V builtins) we can't rely on the SPIRV32/SPIRV64 etc macros +// to establish if we can use the builtin alias. We disable builtin altogether +// if we do not intent to use the backend. So instead of use target macros, rely +// on a __has_builtin test. +#if (__has_builtin(__builtin_spirv_num_workgroups)) +#define __SPIRV_BUILTIN_ALIAS(builtin) \ + __attribute__((clang_builtin_alias(builtin))) +#else +#define __SPIRV_BUILTIN_ALIAS(builtin) +#endif + +// Builtin IDs and sizes + +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_workgroups) __size_t + __spirv_NumWorkgroups(int); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_size) __size_t + __spirv_WorkgroupSize(int); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_workgroup_id) __size_t + __spirv_WorkgroupId(int); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_local_invocation_id) __size_t + __spirv_LocalInvocationId(int); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_invocation_id) __size_t + __spirv_GlobalInvocationId(int); + +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_size) __size_t + __spirv_GlobalSize(int); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_global_offset) __size_t + __spirv_GlobalOffset(int); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_size) __uint32_t + __spirv_SubgroupSize(); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_max_size) __uint32_t + __spirv_SubgroupMaxSize(); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_num_subgroups) __uint32_t + __spirv_NumSubgroups(); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_id) __uint32_t + __spirv_SubgroupId(); +extern __SPIRV_BUILTIN_ALIAS(__builtin_spirv_subgroup_local_invocation_id) + __uint32_t __spirv_SubgroupLocalInvocationId(); + +// OpGenericCastToPtrExplicit + +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__global void *__spirv_GenericCastToPtrExplicit_ToGlobal(__generic void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__global const void * +__spirv_GenericCastToPtrExplicit_ToGlobal(__generic const void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__global volatile void * +__spirv_GenericCastToPtrExplicit_ToGlobal(__generic volatile void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__global const volatile void * +__spirv_GenericCastToPtrExplicit_ToGlobal(__generic const volatile void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__local void *__spirv_GenericCastToPtrExplicit_ToLocal(__generic void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__local const void * +__spirv_GenericCastToPtrExplicit_ToLocal(__generic const void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__local volatile void * +__spirv_GenericCastToPtrExplicit_ToLocal(__generic volatile void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__local const volatile void * +__spirv_GenericCastToPtrExplicit_ToLocal(__generic const volatile void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__private void * +__spirv_GenericCastToPtrExplicit_ToPrivate(__generic void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__private const void * +__spirv_GenericCastToPtrExplicit_ToPrivate(__generic const void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__private volatile void * +__spirv_GenericCastToPtrExplicit_ToPrivate(__generic volatile void *, + int) __SPIRV_NOEXCEPT; +extern __SPIRV_overloadable +__SPIRV_BUILTIN_ALIAS(__builtin_spirv_generic_cast_to_ptr_explicit) +__private const volatile void * +__spirv_GenericCastToPtrExplicit_ToPrivate(__generic const volatile void *, + int) __SPIRV_NOEXCEPT; + +// OpGenericCastToPtr + +static __SPIRV_overloadable __SPIRV_inline __global void * +__spirv_GenericCastToPtr_ToGlobal(__generic void *p, int) __SPIRV_NOEXCEPT { + return (__global void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __global const void * +__spirv_GenericCastToPtr_ToGlobal(__generic const void *p, + int) __SPIRV_NOEXCEPT { + return (__global const void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __global volatile void * +__spirv_GenericCastToPtr_ToGlobal(__generic volatile void *p, + int) __SPIRV_NOEXCEPT { + return (__global volatile void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __global const volatile void * +__spirv_GenericCastToPtr_ToGlobal(__generic const volatile void *p, + int) __SPIRV_NOEXCEPT { + return (__global const volatile void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __local void * +__spirv_GenericCastToPtr_ToLocal(__generic void *p, int) __SPIRV_NOEXCEPT { + return (__local void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __local const void * +__spirv_GenericCastToPtr_ToLocal(__generic const void *p, + int) __SPIRV_NOEXCEPT { + return (__local const void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __local volatile void * +__spirv_GenericCastToPtr_ToLocal(__generic volatile void *p, + int) __SPIRV_NOEXCEPT { + return (__local volatile void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __local const volatile void * +__spirv_GenericCastToPtr_ToLocal(__generic const volatile void *p, + int) __SPIRV_NOEXCEPT { + return (__local const volatile void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __private void * +__spirv_GenericCastToPtr_ToPrivate(__generic void *p, int) __SPIRV_NOEXCEPT { + return (__private void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __private const void * +__spirv_GenericCastToPtr_ToPrivate(__generic const void *p, + int) __SPIRV_NOEXCEPT { + return (__private const void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __private volatile void * +__spirv_GenericCastToPtr_ToPrivate(__generic volatile void *p, + int) __SPIRV_NOEXCEPT { + return (__private volatile void *)p; +} +static __SPIRV_overloadable __SPIRV_inline __private const volatile void * +__spirv_GenericCastToPtr_ToPrivate(__generic const volatile void *p, + int) __SPIRV_NOEXCEPT { + return (__private const volatile void *)p; +} + +#pragma pop_macro("__size_t") +#pragma pop_macro("__uint32_t") +#pragma pop_macro("__uint64_t") + +#undef __SPIRV_overloadable +#undef __SPIRV_convergent +#undef __SPIRV_inline + +#undef __global +#undef __local +#undef __constant +#undef __generic + +#undef __SPIRV_BUILTIN_ALIAS +#undef __SPIRV_NOEXCEPT + +#endif /* __SPIRV_BUILTIN_VARS_H */ diff --git a/lib/include/__stdarg_va_arg.h b/lib/include/__stdarg_va_arg.h index 89bd2f65d3..ebdb6f9d4b 100644 --- a/lib/include/__stdarg_va_arg.h +++ b/lib/include/__stdarg_va_arg.h @@ -10,8 +10,8 @@ #ifndef va_arg #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L -/* C23 does not require the second parameter for va_start. */ -#define va_start(ap, ...) __builtin_va_start(ap, 0) +/* C23 uses a special builtin. */ +#define va_start(...) __builtin_c23_va_start(__VA_ARGS__) #else /* Versions before C23 do require the second parameter. */ #define va_start(ap, param) __builtin_va_start(ap, param) diff --git a/lib/include/altivec.h b/lib/include/altivec.h index 8da6505501..71d8d3c0c0 100644 --- a/lib/include/altivec.h +++ b/lib/include/altivec.h @@ -17525,70 +17525,73 @@ vec_bperm(vector unsigned long long __a, vector unsigned char __b) { /* vec_reve */ -static inline __ATTRS_o_ai vector bool char vec_reve(vector bool char __a) { +static __inline__ __ATTRS_o_ai vector bool char vec_reve(vector bool char __a) { return __builtin_shufflevector(__a, __a, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector signed char vec_reve(vector signed char __a) { +static __inline__ __ATTRS_o_ai vector signed char +vec_reve(vector signed char __a) { return __builtin_shufflevector(__a, __a, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector unsigned char +static __inline__ __ATTRS_o_ai vector unsigned char vec_reve(vector unsigned char __a) { return __builtin_shufflevector(__a, __a, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector bool int vec_reve(vector bool int __a) { +static __inline__ __ATTRS_o_ai vector bool int vec_reve(vector bool int __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector signed int vec_reve(vector signed int __a) { +static __inline__ __ATTRS_o_ai vector signed int +vec_reve(vector signed int __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector unsigned int +static __inline__ __ATTRS_o_ai vector unsigned int vec_reve(vector unsigned int __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector bool short vec_reve(vector bool short __a) { +static __inline__ __ATTRS_o_ai vector bool short +vec_reve(vector bool short __a) { return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector signed short +static __inline__ __ATTRS_o_ai vector signed short vec_reve(vector signed short __a) { return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector unsigned short +static __inline__ __ATTRS_o_ai vector unsigned short vec_reve(vector unsigned short __a) { return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } -static inline __ATTRS_o_ai vector float vec_reve(vector float __a) { +static __inline__ __ATTRS_o_ai vector float vec_reve(vector float __a) { return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } #ifdef __VSX__ -static inline __ATTRS_o_ai vector bool long long +static __inline__ __ATTRS_o_ai vector bool long long vec_reve(vector bool long long __a) { return __builtin_shufflevector(__a, __a, 1, 0); } -static inline __ATTRS_o_ai vector signed long long +static __inline__ __ATTRS_o_ai vector signed long long vec_reve(vector signed long long __a) { return __builtin_shufflevector(__a, __a, 1, 0); } -static inline __ATTRS_o_ai vector unsigned long long +static __inline__ __ATTRS_o_ai vector unsigned long long vec_reve(vector unsigned long long __a) { return __builtin_shufflevector(__a, __a, 1, 0); } -static inline __ATTRS_o_ai vector double vec_reve(vector double __a) { +static __inline__ __ATTRS_o_ai vector double vec_reve(vector double __a) { return __builtin_shufflevector(__a, __a, 1, 0); } #endif @@ -17721,42 +17724,42 @@ typedef vector signed int unaligned_vec_sint __attribute__((aligned(1))); typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1))); typedef vector float unaligned_vec_float __attribute__((aligned(1))); -static inline __ATTRS_o_ai vector signed char vec_xl(ptrdiff_t __offset, - const signed char *__ptr) { +static __inline__ __ATTRS_o_ai vector signed char +vec_xl(ptrdiff_t __offset, const signed char *__ptr) { return *(unaligned_vec_schar *)(__ptr + __offset); } -static inline __ATTRS_o_ai vector unsigned char +static __inline__ __ATTRS_o_ai vector unsigned char vec_xl(ptrdiff_t __offset, const unsigned char *__ptr) { return *(unaligned_vec_uchar*)(__ptr + __offset); } -static inline __ATTRS_o_ai vector signed short +static __inline__ __ATTRS_o_ai vector signed short vec_xl(ptrdiff_t __offset, const signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sshort *)__addr; } -static inline __ATTRS_o_ai vector unsigned short +static __inline__ __ATTRS_o_ai vector unsigned short vec_xl(ptrdiff_t __offset, const unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ushort *)__addr; } -static inline __ATTRS_o_ai vector signed int vec_xl(ptrdiff_t __offset, - const signed int *__ptr) { +static __inline__ __ATTRS_o_ai vector signed int +vec_xl(ptrdiff_t __offset, const signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sint *)__addr; } -static inline __ATTRS_o_ai vector unsigned int +static __inline__ __ATTRS_o_ai vector unsigned int vec_xl(ptrdiff_t __offset, const unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_uint *)__addr; } -static inline __ATTRS_o_ai vector float vec_xl(ptrdiff_t __offset, - const float *__ptr) { +static __inline__ __ATTRS_o_ai vector float vec_xl(ptrdiff_t __offset, + const float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_float *)__addr; } @@ -17766,20 +17769,20 @@ typedef vector signed long long unaligned_vec_sll __attribute__((aligned(1))); typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1))); typedef vector double unaligned_vec_double __attribute__((aligned(1))); -static inline __ATTRS_o_ai vector signed long long +static __inline__ __ATTRS_o_ai vector signed long long vec_xl(ptrdiff_t __offset, const signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sll *)__addr; } -static inline __ATTRS_o_ai vector unsigned long long +static __inline__ __ATTRS_o_ai vector unsigned long long vec_xl(ptrdiff_t __offset, const unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ull *)__addr; } -static inline __ATTRS_o_ai vector double vec_xl(ptrdiff_t __offset, - const double *__ptr) { +static __inline__ __ATTRS_o_ai vector double vec_xl(ptrdiff_t __offset, + const double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_double *)__addr; } @@ -17790,13 +17793,13 @@ static inline __ATTRS_o_ai vector double vec_xl(ptrdiff_t __offset, typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1))); typedef vector unsigned __int128 unaligned_vec_ui128 __attribute__((aligned(1))); -static inline __ATTRS_o_ai vector signed __int128 +static __inline__ __ATTRS_o_ai vector signed __int128 vec_xl(ptrdiff_t __offset, const signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_si128 *)__addr; } -static inline __ATTRS_o_ai vector unsigned __int128 +static __inline__ __ATTRS_o_ai vector unsigned __int128 vec_xl(ptrdiff_t __offset, const unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ui128 *)__addr; @@ -17991,64 +17994,64 @@ vec_load_splats(unsigned long long __offset, const float *__ptr) { #define vec_xstd2 vec_xst #define vec_xstw4 vec_xst -static inline __ATTRS_o_ai void +static __inline__ __ATTRS_o_ai void vec_xst(vector signed char __vec, ptrdiff_t __offset, signed char *__ptr) { *(unaligned_vec_schar *)(__ptr + __offset) = __vec; } -static inline __ATTRS_o_ai void +static __inline__ __ATTRS_o_ai void vec_xst(vector unsigned char __vec, ptrdiff_t __offset, unsigned char *__ptr) { *(unaligned_vec_uchar *)(__ptr + __offset) = __vec; } -static inline __ATTRS_o_ai void +static __inline__ __ATTRS_o_ai void vec_xst(vector signed short __vec, ptrdiff_t __offset, signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_sshort *)__addr = __vec; } -static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec, - ptrdiff_t __offset, - unsigned short *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector unsigned short __vec, + ptrdiff_t __offset, + unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_ushort *)__addr = __vec; } -static inline __ATTRS_o_ai void vec_xst(vector signed int __vec, - ptrdiff_t __offset, signed int *__ptr) { +static __inline__ __ATTRS_o_ai void +vec_xst(vector signed int __vec, ptrdiff_t __offset, signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_sint *)__addr = __vec; } -static inline __ATTRS_o_ai void +static __inline__ __ATTRS_o_ai void vec_xst(vector unsigned int __vec, ptrdiff_t __offset, unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_uint *)__addr = __vec; } -static inline __ATTRS_o_ai void vec_xst(vector float __vec, ptrdiff_t __offset, - float *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector float __vec, + ptrdiff_t __offset, float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_float *)__addr = __vec; } #ifdef __VSX__ -static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec, - ptrdiff_t __offset, - signed long long *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector signed long long __vec, + ptrdiff_t __offset, + signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_sll *)__addr = __vec; } -static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec, - ptrdiff_t __offset, - unsigned long long *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector unsigned long long __vec, + ptrdiff_t __offset, + unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_ull *)__addr = __vec; } -static inline __ATTRS_o_ai void vec_xst(vector double __vec, ptrdiff_t __offset, - double *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector double __vec, + ptrdiff_t __offset, double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_double *)__addr = __vec; } @@ -18056,16 +18059,16 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec, ptrdiff_t __offset, #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) && \ defined(__SIZEOF_INT128__) -static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec, - ptrdiff_t __offset, - signed __int128 *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector signed __int128 __vec, + ptrdiff_t __offset, + signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_si128 *)__addr = __vec; } -static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, - ptrdiff_t __offset, - unsigned __int128 *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, + ptrdiff_t __offset, + unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; *(unaligned_vec_ui128 *)__addr = __vec; } @@ -18075,51 +18078,51 @@ static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, #if defined(__POWER10_VECTOR__) && defined(__VSX__) && \ defined(__SIZEOF_INT128__) -static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, - ptrdiff_t __offset, - signed char *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + ptrdiff_t __offset, + signed char *__ptr) { *(__ptr + __offset) = (signed char)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, - ptrdiff_t __offset, - unsigned char *__ptr) { +static __inline__ __ATTRS_o_ai void +vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, + unsigned char *__ptr) { *(__ptr + __offset) = (unsigned char)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, - ptrdiff_t __offset, - signed short *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + ptrdiff_t __offset, + signed short *__ptr) { *(__ptr + __offset) = (signed short)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, - ptrdiff_t __offset, - unsigned short *__ptr) { +static __inline__ __ATTRS_o_ai void +vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, + unsigned short *__ptr) { *(__ptr + __offset) = (unsigned short)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, - ptrdiff_t __offset, - signed int *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + ptrdiff_t __offset, + signed int *__ptr) { *(__ptr + __offset) = (signed int)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, - ptrdiff_t __offset, - unsigned int *__ptr) { +static __inline__ __ATTRS_o_ai void +vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, + unsigned int *__ptr) { *(__ptr + __offset) = (unsigned int)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, - ptrdiff_t __offset, - signed long long *__ptr) { +static __inline__ __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + ptrdiff_t __offset, + signed long long *__ptr) { *(__ptr + __offset) = (signed long long)__vec[0]; } -static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, - ptrdiff_t __offset, - unsigned long long *__ptr) { +static __inline__ __ATTRS_o_ai void +vec_xst_trunc(vector unsigned __int128 __vec, ptrdiff_t __offset, + unsigned long long *__ptr) { *(__ptr + __offset) = (unsigned long long)__vec[0]; } #endif diff --git a/lib/include/amxavx512intrin.h b/lib/include/amxavx512intrin.h index a158983482..bbde44fc26 100644 --- a/lib/include/amxavx512intrin.h +++ b/lib/include/amxavx512intrin.h @@ -228,7 +228,7 @@ /// dst.byte[i] := a.row[row_index].byte[row_chunk+i] /// ENDFOR /// \endcode -#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b) +#define _tile_movrow(a, b) ((__m512i)__builtin_ia32_tilemovrow(a, b)) /// This is internal intrinsic. C/C++ user should avoid calling it directly. diff --git a/lib/include/amxcomplexintrin.h b/lib/include/amxcomplexintrin.h index 84ef972fca..87ee8f3919 100644 --- a/lib/include/amxcomplexintrin.h +++ b/lib/include/amxcomplexintrin.h @@ -135,9 +135,8 @@ _tile_cmmrlfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_COMPLEX -static void __tile_cmmimfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __DEFAULT_FN_ATTRS_COMPLEX +__tile_cmmimfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_cmmimfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -158,9 +157,8 @@ static void __tile_cmmimfp16ps(__tile1024i *dst, __tile1024i src0, /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_COMPLEX -static void __tile_cmmrlfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __DEFAULT_FN_ATTRS_COMPLEX +__tile_cmmrlfp16ps(__tile1024i *dst, __tile1024i src0, __tile1024i src1) { dst->tile = _tile_cmmrlfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } diff --git a/lib/include/amxmovrstransposeintrin.h b/lib/include/amxmovrstransposeintrin.h index 17a9f7506a..5f48cba949 100644 --- a/lib/include/amxmovrstransposeintrin.h +++ b/lib/include/amxmovrstransposeintrin.h @@ -197,4 +197,4 @@ static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1, #undef __DEFAULT_FN_ATTRS #endif /* __x86_64__ */ -#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */ \ No newline at end of file +#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */ diff --git a/lib/include/amxtf32transposeintrin.h b/lib/include/amxtf32transposeintrin.h index 60336f953e..e1b90c1adf 100644 --- a/lib/include/amxtf32transposeintrin.h +++ b/lib/include/amxtf32transposeintrin.h @@ -8,7 +8,7 @@ */ #ifndef __IMMINTRIN_H #error \ - "Never use directly; include instead." + "Never use directly; include instead." #endif // __IMMINTRIN_H #ifndef __AMX_TF32TRANSPOSEINTRIN_H diff --git a/lib/include/andes_vector.h b/lib/include/andes_vector.h new file mode 100644 index 0000000000..dc717e6d80 --- /dev/null +++ b/lib/include/andes_vector.h @@ -0,0 +1,16 @@ +//===----- andes_vector.h - Andes Vector definitions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _ANDES_VECTOR_H_ +#define _ANDES_VECTOR_H_ + +#include "riscv_vector.h" + +#pragma clang riscv intrinsic andes_vector + +#endif //_ANDES_VECTOR_H_ diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index b1dc90f84a..5cfa3d023a 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -29,47 +29,16 @@ extern "C" { /* 7 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */ /* 7.3 Memory barriers */ -#if !__has_builtin(__dmb) -#define __dmb(i) __builtin_arm_dmb(i) -#endif -#if !__has_builtin(__dsb) -#define __dsb(i) __builtin_arm_dsb(i) -#endif -#if !__has_builtin(__isb) -#define __isb(i) __builtin_arm_isb(i) -#endif +void __dmb(unsigned int); +void __dsb(unsigned int); +void __isb(unsigned int); /* 7.4 Hints */ - -#if !__has_builtin(__wfi) -static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) { - __builtin_arm_wfi(); -} -#endif - -#if !__has_builtin(__wfe) -static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) { - __builtin_arm_wfe(); -} -#endif - -#if !__has_builtin(__sev) -static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) { - __builtin_arm_sev(); -} -#endif - -#if !__has_builtin(__sevl) -static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) { - __builtin_arm_sevl(); -} -#endif - -#if !__has_builtin(__yield) -static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) { - __builtin_arm_yield(); -} -#endif +void __wfi(void); +void __wfe(void); +void __sev(void); +void __sevl(void); +void __yield(void); #if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE #define __dbg(t) __builtin_arm_dbg(t) @@ -872,8 +841,9 @@ __gcspopm() { return __builtin_arm_gcspopm(0); } -static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs"))) -__gcsss(const void *__stack) { +static __inline__ void *__attribute__((__always_inline__, __nodebug__, + target("gcs"))) +__gcsss(void *__stack) { return __builtin_arm_gcsss(__stack); } #endif diff --git a/lib/include/arm_fp16.h b/lib/include/arm_fp16.h index 2dd0653ab0..f48e44ce8a 100644 --- a/lib/include/arm_fp16.h +++ b/lib/include/arm_fp16.h @@ -34,408 +34,408 @@ typedef __fp16 float16_t; float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vabdh_f16(__s0, __s1)); \ __ret; \ }) #define vabsh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vabsh_f16(__s0)); \ __ret; \ }) #define vaddh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vaddh_f16(__s0, __s1)); \ __ret; \ }) #define vcageh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcageh_f16(__s0, __s1)); \ __ret; \ }) #define vcagth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcagth_f16(__s0, __s1)); \ __ret; \ }) #define vcaleh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcaleh_f16(__s0, __s1)); \ __ret; \ }) #define vcalth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcalth_f16(__s0, __s1)); \ __ret; \ }) #define vceqh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vceqh_f16(__s0, __s1)); \ __ret; \ }) #define vceqzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vceqzh_f16(__s0)); \ __ret; \ }) #define vcgeh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgeh_f16(__s0, __s1)); \ __ret; \ }) #define vcgezh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgezh_f16(__s0)); \ __ret; \ }) #define vcgth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgth_f16(__s0, __s1)); \ __ret; \ }) #define vcgtzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcgtzh_f16(__s0)); \ __ret; \ }) #define vcleh_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcleh_f16(__s0, __s1)); \ __ret; \ }) #define vclezh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vclezh_f16(__s0)); \ __ret; \ }) #define vclth_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vclth_f16(__s0, __s1)); \ __ret; \ }) #define vcltzh_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcltzh_f16(__s0)); \ __ret; \ }) #define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvth_n_s16_f16(__s0, __p1)); \ __ret; \ }) #define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvth_n_s32_f16(__s0, __p1)); \ __ret; \ }) #define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvth_n_s64_f16(__s0, __p1)); \ __ret; \ }) #define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvth_n_u16_f16(__s0, __p1)); \ __ret; \ }) #define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvth_n_u32_f16(__s0, __p1)); \ __ret; \ }) #define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvth_n_u64_f16(__s0, __p1)); \ __ret; \ }) #define vcvth_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvth_s16_f16(__s0)); \ __ret; \ }) #define vcvth_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvth_s32_f16(__s0)); \ __ret; \ }) #define vcvth_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvth_s64_f16(__s0)); \ __ret; \ }) #define vcvth_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvth_u16_f16(__s0)); \ __ret; \ }) #define vcvth_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvth_u32_f16(__s0)); \ __ret; \ }) #define vcvth_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvth_u64_f16(__s0)); \ __ret; \ }) #define vcvtah_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtah_s16_f16(__s0)); \ __ret; \ }) #define vcvtah_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtah_s32_f16(__s0)); \ __ret; \ }) #define vcvtah_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtah_s64_f16(__s0)); \ __ret; \ }) #define vcvtah_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtah_u16_f16(__s0)); \ __ret; \ }) #define vcvtah_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtah_u32_f16(__s0)); \ __ret; \ }) #define vcvtah_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtah_u64_f16(__s0)); \ __ret; \ }) #define vcvth_f16_u16(__p0) __extension__ ({ \ float16_t __ret; \ uint16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_u16(__s0)); \ __ret; \ }) #define vcvth_f16_s16(__p0) __extension__ ({ \ float16_t __ret; \ int16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_s16(__s0)); \ __ret; \ }) #define vcvth_f16_u32(__p0) __extension__ ({ \ float16_t __ret; \ uint32_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_u32(__s0)); \ __ret; \ }) #define vcvth_f16_s32(__p0) __extension__ ({ \ float16_t __ret; \ int32_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_s32(__s0)); \ __ret; \ }) #define vcvth_f16_u64(__p0) __extension__ ({ \ float16_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_u64(__s0)); \ __ret; \ }) #define vcvth_f16_s64(__p0) __extension__ ({ \ float16_t __ret; \ int64_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_f16_s64(__s0)); \ __ret; \ }) #define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint32_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_u32(__s0, __p1)); \ __ret; \ }) #define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int32_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_s32(__s0, __p1)); \ __ret; \ }) #define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_u64(__s0, __p1)); \ __ret; \ }) #define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int64_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_s64(__s0, __p1)); \ __ret; \ }) #define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ uint16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_u16(__s0, __p1)); \ __ret; \ }) #define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ int16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vcvth_n_f16_s16(__s0, __p1)); \ __ret; \ }) #define vcvtmh_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtmh_s16_f16(__s0)); \ __ret; \ }) #define vcvtmh_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtmh_s32_f16(__s0)); \ __ret; \ }) #define vcvtmh_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtmh_s64_f16(__s0)); \ __ret; \ }) #define vcvtmh_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtmh_u16_f16(__s0)); \ __ret; \ }) #define vcvtmh_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtmh_u32_f16(__s0)); \ __ret; \ }) #define vcvtmh_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtmh_u64_f16(__s0)); \ __ret; \ }) #define vcvtnh_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtnh_s16_f16(__s0)); \ __ret; \ }) #define vcvtnh_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtnh_s32_f16(__s0)); \ __ret; \ }) #define vcvtnh_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtnh_s64_f16(__s0)); \ __ret; \ }) #define vcvtnh_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtnh_u16_f16(__s0)); \ __ret; \ }) #define vcvtnh_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtnh_u32_f16(__s0)); \ __ret; \ }) #define vcvtnh_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtnh_u64_f16(__s0)); \ __ret; \ }) #define vcvtph_s16_f16(__p0) __extension__ ({ \ int16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vcvtph_s16_f16(__s0)); \ __ret; \ }) #define vcvtph_s32_f16(__p0) __extension__ ({ \ int32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtph_s32_f16(__s0)); \ __ret; \ }) #define vcvtph_s64_f16(__p0) __extension__ ({ \ int64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtph_s64_f16(__s0)); \ __ret; \ }) #define vcvtph_u16_f16(__p0) __extension__ ({ \ uint16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vcvtph_u16_f16(__s0)); \ __ret; \ }) #define vcvtph_u32_f16(__p0) __extension__ ({ \ uint32_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtph_u32_f16(__s0)); \ __ret; \ }) #define vcvtph_u64_f16(__p0) __extension__ ({ \ uint64_t __ret; \ float16_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtph_u64_f16(__s0)); \ __ret; \ }) #define vdivh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vdivh_f16(__s0, __s1)); \ __ret; \ }) #define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -443,7 +443,7 @@ typedef __fp16 float16_t; float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16_t __s2 = __p2; \ - __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_f16(__s0, __s1, __s2)); \ __ret; \ }) #define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \ @@ -451,142 +451,142 @@ typedef __fp16 float16_t; float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16_t __s2 = __p2; \ - __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmsh_f16(__s0, __s1, __s2)); \ __ret; \ }) #define vmaxh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxh_f16(__s0, __s1)); \ __ret; \ }) #define vmaxnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxnmh_f16(__s0, __s1)); \ __ret; \ }) #define vminh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminh_f16(__s0, __s1)); \ __ret; \ }) #define vminnmh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminnmh_f16(__s0, __s1)); \ __ret; \ }) #define vmulh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulh_f16(__s0, __s1)); \ __ret; \ }) #define vmulxh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulxh_f16(__s0, __s1)); \ __ret; \ }) #define vnegh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vnegh_f16(__s0)); \ __ret; \ }) #define vrecpeh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrecpeh_f16(__s0)); \ __ret; \ }) #define vrecpsh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrecpsh_f16(__s0, __s1)); \ __ret; \ }) #define vrecpxh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrecpxh_f16(__s0)); \ __ret; \ }) #define vrndh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndh_f16(__s0)); \ __ret; \ }) #define vrndah_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndah_f16(__s0)); \ __ret; \ }) #define vrndih_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndih_f16(__s0)); \ __ret; \ }) #define vrndmh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndmh_f16(__s0)); \ __ret; \ }) #define vrndnh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndnh_f16(__s0)); \ __ret; \ }) #define vrndph_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndph_f16(__s0)); \ __ret; \ }) #define vrndxh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrndxh_f16(__s0)); \ __ret; \ }) #define vrsqrteh_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrsqrteh_f16(__s0)); \ __ret; \ }) #define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vrsqrtsh_f16(__s0, __s1)); \ __ret; \ }) #define vsqrth_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vsqrth_f16(__s0)); \ __ret; \ }) #define vsubh_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vsubh_f16(__s0, __s1)); \ __ret; \ }) #endif diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index ab28e839e4..476158a2cb 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -24,7 +24,9 @@ #ifndef __ARM_NEON_H #define __ARM_NEON_H -#ifndef __ARM_FP +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__arm64ec__) +#error " is intended only for ARM and AArch64 targets" +#elif !defined(__ARM_FP) #error "NEON intrinsics not available with the soft-float ABI. Please use -mfloat-abi=softfp or -mfloat-abi=hard" #else @@ -123,26 +125,45 @@ typedef struct poly64x2x4_t { #define __ai static __inline__ __attribute__((__always_inline__, __nodebug__)) +#if !defined(__LITTLE_ENDIAN__) +#if defined(__aarch64__) || defined(__arm64ec__) +#define __lane_reverse_64_32 1,0 +#define __lane_reverse_64_16 3,2,1,0 +#define __lane_reverse_64_8 7,6,5,4,3,2,1,0 +#define __lane_reverse_128_64 1,0 +#define __lane_reverse_128_32 3,2,1,0 +#define __lane_reverse_128_16 7,6,5,4,3,2,1,0 +#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +#else +#define __lane_reverse_64_32 1,0 +#define __lane_reverse_64_16 3,2,1,0 +#define __lane_reverse_64_8 7,6,5,4,3,2,1,0 +#define __lane_reverse_128_64 0,1 +#define __lane_reverse_128_32 1,0,3,2 +#define __lane_reverse_128_16 3,2,1,0,7,6,5,4 +#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8 +#endif +#endif #ifdef __LITTLE_ENDIAN__ #define splatq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16x8_t) __builtin_neon_splatq_lane_bf16((int8x8_t)__s0, __p1, 11); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_splatq_lane_bf16(__builtin_bit_cast(int8x8_t, __s0), __p1, 11)); \ __ret; \ }) #else #define splatq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_splatq_lane_bf16((int8x8_t)__rev0, __p1, 11); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_splatq_lane_bf16(__builtin_bit_cast(int8x8_t, __rev0), __p1, 11)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16x8_t) __builtin_neon_splatq_lane_bf16((int8x8_t)__s0, __p1, 11); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_splatq_lane_bf16(__builtin_bit_cast(int8x8_t, __s0), __p1, 11)); \ __ret; \ }) #endif @@ -151,22 +172,22 @@ typedef struct poly64x2x4_t { #define splat_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16x4_t) __builtin_neon_splat_lane_bf16((int8x8_t)__s0, __p1, 11); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_splat_lane_bf16(__builtin_bit_cast(int8x8_t, __s0), __p1, 11)); \ __ret; \ }) #else #define splat_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s0 = __p0; \ - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (bfloat16x4_t) __builtin_neon_splat_lane_bf16((int8x8_t)__rev0, __p1, 11); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_splat_lane_bf16(__builtin_bit_cast(int8x8_t, __rev0), __p1, 11)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16x4_t) __builtin_neon_splat_lane_bf16((int8x8_t)__s0, __p1, 11); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_splat_lane_bf16(__builtin_bit_cast(int8x8_t, __s0), __p1, 11)); \ __ret; \ }) #endif @@ -175,22 +196,22 @@ typedef struct poly64x2x4_t { #define splatq_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16x8_t) __builtin_neon_splatq_laneq_bf16((int8x16_t)__s0, __p1, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_splatq_laneq_bf16(__builtin_bit_cast(int8x16_t, __s0), __p1, 43)); \ __ret; \ }) #else #define splatq_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_splatq_laneq_bf16((int8x16_t)__rev0, __p1, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_splatq_laneq_bf16(__builtin_bit_cast(int8x16_t, __rev0), __p1, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16x8_t) __builtin_neon_splatq_laneq_bf16((int8x16_t)__s0, __p1, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_splatq_laneq_bf16(__builtin_bit_cast(int8x16_t, __s0), __p1, 43)); \ __ret; \ }) #endif @@ -199,22 +220,22 @@ typedef struct poly64x2x4_t { #define splat_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16x4_t) __builtin_neon_splat_laneq_bf16((int8x16_t)__s0, __p1, 43); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_splat_laneq_bf16(__builtin_bit_cast(int8x16_t, __s0), __p1, 43)); \ __ret; \ }) #else #define splat_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x8_t __s0 = __p0; \ - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x4_t) __builtin_neon_splat_laneq_bf16((int8x16_t)__rev0, __p1, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_splat_laneq_bf16(__builtin_bit_cast(int8x16_t, __rev0), __p1, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16x4_t) __builtin_neon_splat_laneq_bf16((int8x16_t)__s0, __p1, 43); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_splat_laneq_bf16(__builtin_bit_cast(int8x16_t, __s0), __p1, 43)); \ __ret; \ }) #endif @@ -222,22 +243,22 @@ typedef struct poly64x2x4_t { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) float32x4_t vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfdotq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) float32x4_t vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfdotq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfdotq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -245,22 +266,22 @@ __ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vbfdotq_f32(float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) float32x2_t vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vbfdot_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) float32x2_t vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - bfloat16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + bfloat16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vbfdot_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x2_t __noswap_vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vbfdot_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -268,22 +289,22 @@ __ai __attribute__((target("bf16,neon"))) float32x2_t __noswap_vbfdot_f32(float3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) float32x4_t vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmlalbq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) float32x4_t vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmlalbq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmlalbq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -291,22 +312,22 @@ __ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vbfmlalbq_f32(flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) float32x4_t vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmlaltq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) float32x4_t vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmlaltq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmlaltq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -314,17 +335,17 @@ __ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vbfmlaltq_f32(flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) float32x4_t vbfmmlaq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbfmmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmmlaq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) float32x4_t vbfmmlaq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vbfmmlaq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + bfloat16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbfmmlaq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -338,10 +359,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcombine_bf16(bfloat16x4_ #else __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcombine_bf16(bfloat16x4_t __p0, bfloat16x4_t __p1) { bfloat16x8_t __ret; - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t __noswap_vcombine_bf16(bfloat16x4_t __p0, bfloat16x4_t __p1) { @@ -354,7 +375,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t __noswap_vcombine_bf16(bf #define vcreate_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ uint64_t __promote = __p0; \ - __ret = (bfloat16x4_t)(__promote); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __promote); \ __ret; \ }) __ai __attribute__((target("bf16,neon"))) float32_t vcvtah_f32_bf16(bfloat16_t __p0) { @@ -364,22 +385,22 @@ __ai __attribute__((target("bf16,neon"))) float32_t vcvtah_f32_bf16(bfloat16_t _ } __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __p0) { bfloat16_t __ret; - __ret = (bfloat16_t) __builtin_neon_vcvth_bf16_f32(__p0); + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vcvth_bf16_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ #define vduph_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16_t) __builtin_neon_vduph_lane_bf16((bfloat16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_lane_bf16(__s0, __p1)); \ __ret; \ }) #else #define vduph_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (bfloat16_t) __builtin_neon_vduph_lane_bf16((bfloat16x4_t)__rev0, __p1); \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_lane_bf16(__rev0, __p1)); \ __ret; \ }) #endif @@ -395,9 +416,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __ #define vdupq_lane_bf16(__p0_1, __p1_1) __extension__ ({ \ bfloat16x8_t __ret_1; \ bfloat16x4_t __s0_1 = __p0_1; \ - bfloat16x4_t __rev0_1; __rev0_1 = __builtin_shufflevector(__s0_1, __s0_1, 3, 2, 1, 0); \ + bfloat16x4_t __rev0_1; __rev0_1 = __builtin_shufflevector(__s0_1, __s0_1, __lane_reverse_64_16); \ __ret_1 = __noswap_splatq_lane_bf16(__rev0_1, __p1_1); \ - __ret_1 = __builtin_shufflevector(__ret_1, __ret_1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_1 = __builtin_shufflevector(__ret_1, __ret_1, __lane_reverse_128_16); \ __ret_1; \ }) #endif @@ -413,9 +434,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __ #define vdup_lane_bf16(__p0_3, __p1_3) __extension__ ({ \ bfloat16x4_t __ret_3; \ bfloat16x4_t __s0_3 = __p0_3; \ - bfloat16x4_t __rev0_3; __rev0_3 = __builtin_shufflevector(__s0_3, __s0_3, 3, 2, 1, 0); \ + bfloat16x4_t __rev0_3; __rev0_3 = __builtin_shufflevector(__s0_3, __s0_3, __lane_reverse_64_16); \ __ret_3 = __noswap_splat_lane_bf16(__rev0_3, __p1_3); \ - __ret_3 = __builtin_shufflevector(__ret_3, __ret_3, 3, 2, 1, 0); \ + __ret_3 = __builtin_shufflevector(__ret_3, __ret_3, __lane_reverse_64_16); \ __ret_3; \ }) #endif @@ -424,15 +445,15 @@ __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __ #define vduph_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16_t) __builtin_neon_vduph_laneq_bf16((bfloat16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_laneq_bf16(__s0, __p1)); \ __ret; \ }) #else #define vduph_laneq_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16_t) __builtin_neon_vduph_laneq_bf16((bfloat16x8_t)__rev0, __p1); \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vduph_laneq_bf16(__rev0, __p1)); \ __ret; \ }) #endif @@ -448,9 +469,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __ #define vdupq_laneq_bf16(__p0_5, __p1_5) __extension__ ({ \ bfloat16x8_t __ret_5; \ bfloat16x8_t __s0_5 = __p0_5; \ - bfloat16x8_t __rev0_5; __rev0_5 = __builtin_shufflevector(__s0_5, __s0_5, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev0_5; __rev0_5 = __builtin_shufflevector(__s0_5, __s0_5, __lane_reverse_128_16); \ __ret_5 = __noswap_splatq_laneq_bf16(__rev0_5, __p1_5); \ - __ret_5 = __builtin_shufflevector(__ret_5, __ret_5, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_5 = __builtin_shufflevector(__ret_5, __ret_5, __lane_reverse_128_16); \ __ret_5; \ }) #endif @@ -466,9 +487,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __ #define vdup_laneq_bf16(__p0_7, __p1_7) __extension__ ({ \ bfloat16x4_t __ret_7; \ bfloat16x8_t __s0_7 = __p0_7; \ - bfloat16x8_t __rev0_7; __rev0_7 = __builtin_shufflevector(__s0_7, __s0_7, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev0_7; __rev0_7 = __builtin_shufflevector(__s0_7, __s0_7, __lane_reverse_128_16); \ __ret_7 = __noswap_splat_laneq_bf16(__rev0_7, __p1_7); \ - __ret_7 = __builtin_shufflevector(__ret_7, __ret_7, 3, 2, 1, 0); \ + __ret_7 = __builtin_shufflevector(__ret_7, __ret_7, __lane_reverse_64_16); \ __ret_7; \ }) #endif @@ -483,7 +504,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vdupq_n_bf16(bfloat16_t _ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vdupq_n_bf16(bfloat16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -498,7 +519,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vdup_n_bf16(bfloat16_t __ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vdup_n_bf16(bfloat16_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -512,9 +533,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vget_high_bf16(bfloat16x8 #else __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vget_high_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_high_bf16(bfloat16x8_t __p0) { @@ -528,21 +549,21 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_high_bf16(b #define vgetq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16_t) __builtin_neon_vgetq_lane_bf16((bfloat16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vgetq_lane_bf16(__s0, __p1)); \ __ret; \ }) #else #define vgetq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16_t) __builtin_neon_vgetq_lane_bf16((bfloat16x8_t)__rev0, __p1); \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vgetq_lane_bf16(__rev0, __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x8_t __s0 = __p0; \ - __ret = (bfloat16_t) __builtin_neon_vgetq_lane_bf16((bfloat16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vgetq_lane_bf16(__s0, __p1)); \ __ret; \ }) #endif @@ -551,21 +572,21 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_high_bf16(b #define vget_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16_t) __builtin_neon_vget_lane_bf16((bfloat16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vget_lane_bf16(__s0, __p1)); \ __ret; \ }) #else #define vget_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (bfloat16_t) __builtin_neon_vget_lane_bf16((bfloat16x4_t)__rev0, __p1); \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vget_lane_bf16(__rev0, __p1)); \ __ret; \ }) #define __noswap_vget_lane_bf16(__p0, __p1) __extension__ ({ \ bfloat16_t __ret; \ bfloat16x4_t __s0 = __p0; \ - __ret = (bfloat16_t) __builtin_neon_vget_lane_bf16((bfloat16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(bfloat16_t, __builtin_neon_vget_lane_bf16(__s0, __p1)); \ __ret; \ }) #endif @@ -579,9 +600,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vget_low_bf16(bfloat16x8_ #else __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vget_low_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bfloat16x8_t __p0) { @@ -594,14 +615,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #ifdef __LITTLE_ENDIAN__ #define vld1q_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ - __ret = (bfloat16x8_t) __builtin_neon_vld1q_bf16(__p0, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vld1q_bf16(__p0, 43)); \ __ret; \ }) #else #define vld1q_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ - __ret = (bfloat16x8_t) __builtin_neon_vld1q_bf16(__p0, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vld1q_bf16(__p0, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -609,14 +630,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #ifdef __LITTLE_ENDIAN__ #define vld1_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ - __ret = (bfloat16x4_t) __builtin_neon_vld1_bf16(__p0, 11); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vld1_bf16(__p0, 11)); \ __ret; \ }) #else #define vld1_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ - __ret = (bfloat16x4_t) __builtin_neon_vld1_bf16(__p0, 11); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vld1_bf16(__p0, 11)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -624,14 +645,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ - __ret = (bfloat16x8_t) __builtin_neon_vld1q_dup_bf16(__p0, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vld1q_dup_bf16(__p0, 43)); \ __ret; \ }) #else #define vld1q_dup_bf16(__p0) __extension__ ({ \ bfloat16x8_t __ret; \ - __ret = (bfloat16x8_t) __builtin_neon_vld1q_dup_bf16(__p0, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vld1q_dup_bf16(__p0, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -639,14 +660,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #ifdef __LITTLE_ENDIAN__ #define vld1_dup_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ - __ret = (bfloat16x4_t) __builtin_neon_vld1_dup_bf16(__p0, 11); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vld1_dup_bf16(__p0, 11)); \ __ret; \ }) #else #define vld1_dup_bf16(__p0) __extension__ ({ \ bfloat16x4_t __ret; \ - __ret = (bfloat16x4_t) __builtin_neon_vld1_dup_bf16(__p0, 11); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vld1_dup_bf16(__p0, 11)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -655,16 +676,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vld1q_lane_bf16(__p0, (int8x16_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vld1q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 43)); \ __ret; \ }) #else #define vld1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16x8_t __s1 = __p1; \ - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vld1q_lane_bf16(__p0, (int8x16_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vld1q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -673,16 +694,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s1 = __p1; \ - __ret = (bfloat16x4_t) __builtin_neon_vld1_lane_bf16(__p0, (int8x8_t)__s1, __p2, 11); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vld1_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 11)); \ __ret; \ }) #else #define vld1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16x4_t __s1 = __p1; \ - bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (bfloat16x4_t) __builtin_neon_vld1_lane_bf16(__p0, (int8x8_t)__rev1, __p2, 11); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vld1_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 11)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -698,8 +719,8 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x2_t __ret; \ __builtin_neon_vld1q_bf16_x2(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -715,8 +736,8 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x2_t __ret; \ __builtin_neon_vld1_bf16_x2(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -732,9 +753,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x3_t __ret; \ __builtin_neon_vld1q_bf16_x3(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -750,9 +771,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x3_t __ret; \ __builtin_neon_vld1_bf16_x3(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -768,10 +789,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x4_t __ret; \ __builtin_neon_vld1q_bf16_x4(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -787,10 +808,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x4_t __ret; \ __builtin_neon_vld1_bf16_x4(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -806,8 +827,8 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x2_t __ret; \ __builtin_neon_vld2q_bf16(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -823,8 +844,8 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x2_t __ret; \ __builtin_neon_vld2_bf16(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -840,8 +861,8 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x2_t __ret; \ __builtin_neon_vld2q_dup_bf16(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -857,8 +878,8 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x2_t __ret; \ __builtin_neon_vld2_dup_bf16(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -867,7 +888,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __ret; \ bfloat16x8x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_bf16(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 43); \ + __builtin_neon_vld2q_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 43); \ __ret; \ }) #else @@ -875,12 +896,12 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x2_t __ret; \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_bf16(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vld2q_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -889,7 +910,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __ret; \ bfloat16x4x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_bf16(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 11); \ + __builtin_neon_vld2_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 11); \ __ret; \ }) #else @@ -897,12 +918,12 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x2_t __ret; \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_bf16(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vld2_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -918,9 +939,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x3_t __ret; \ __builtin_neon_vld3q_bf16(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -936,9 +957,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x3_t __ret; \ __builtin_neon_vld3_bf16(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -954,9 +975,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x3_t __ret; \ __builtin_neon_vld3q_dup_bf16(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -972,9 +993,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x3_t __ret; \ __builtin_neon_vld3_dup_bf16(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -983,7 +1004,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __ret; \ bfloat16x8x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_bf16(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 43); \ + __builtin_neon_vld3q_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 43); \ __ret; \ }) #else @@ -991,14 +1012,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x3_t __ret; \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_bf16(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vld3q_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -1007,7 +1028,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __ret; \ bfloat16x4x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_bf16(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 11); \ + __builtin_neon_vld3_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 11); \ __ret; \ }) #else @@ -1015,14 +1036,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x3_t __ret; \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_bf16(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vld3_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -1038,10 +1059,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x4_t __ret; \ __builtin_neon_vld4q_bf16(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -1057,10 +1078,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x4_t __ret; \ __builtin_neon_vld4_bf16(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -1076,10 +1097,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x4_t __ret; \ __builtin_neon_vld4q_dup_bf16(&__ret, __p0, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -1095,10 +1116,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x4_t __ret; \ __builtin_neon_vld4_dup_bf16(&__ret, __p0, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -1107,7 +1128,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __ret; \ bfloat16x8x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_bf16(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 43); \ + __builtin_neon_vld4q_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 43); \ __ret; \ }) #else @@ -1115,16 +1136,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8x4_t __ret; \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_bf16(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vld4q_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 43); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -1133,7 +1154,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #define vld4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __ret; \ bfloat16x4x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_bf16(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 11); \ + __builtin_neon_vld4_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 11); \ __ret; \ }) #else @@ -1141,16 +1162,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4x4_t __ret; \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_bf16(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vld4_lane_bf16(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 11); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -1160,7 +1181,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x8_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vsetq_lane_bf16(__s0, (bfloat16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vsetq_lane_bf16(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -1168,16 +1189,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x8_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x8_t __s1 = __p1; \ - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vsetq_lane_bf16(__s0, (bfloat16x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vsetq_lane_bf16(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vsetq_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x8_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vsetq_lane_bf16(__s0, (bfloat16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vsetq_lane_bf16(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -1187,7 +1208,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x4_t __s1 = __p1; \ - __ret = (bfloat16x4_t) __builtin_neon_vset_lane_bf16(__s0, (bfloat16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vset_lane_bf16(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -1195,16 +1216,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf bfloat16x4_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x4_t __s1 = __p1; \ - bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (bfloat16x4_t) __builtin_neon_vset_lane_bf16(__s0, (bfloat16x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vset_lane_bf16(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vset_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __ret; \ bfloat16_t __s0 = __p0; \ bfloat16x4_t __s1 = __p1; \ - __ret = (bfloat16x4_t) __builtin_neon_vset_lane_bf16(__s0, (bfloat16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vset_lane_bf16(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -1212,362 +1233,362 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap_vget_low_bf16(bf #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_bf16(__p0, (int8x16_t)__s1, 43); \ + __builtin_neon_vst1q_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1), 43); \ }) #else #define vst1q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_bf16(__p0, (int8x16_t)__rev1, 43); \ + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ - __builtin_neon_vst1_bf16(__p0, (int8x8_t)__s1, 11); \ + __builtin_neon_vst1_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1), 11); \ }) #else #define vst1_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ - bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_bf16(__p0, (int8x8_t)__rev1, 11); \ + bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_bf16(__p0, (int8x16_t)__s1, __p2, 43); \ + __builtin_neon_vst1q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 43); \ }) #else #define vst1q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8_t __s1 = __p1; \ - bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_bf16(__p0, (int8x16_t)__rev1, __p2, 43); \ + bfloat16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_bf16(__p0, (int8x8_t)__s1, __p2, 11); \ + __builtin_neon_vst1_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 11); \ }) #else #define vst1_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4_t __s1 = __p1; \ - bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_bf16(__p0, (int8x8_t)__rev1, __p2, 11); \ + bfloat16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_bf16_x2(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 43); \ + __builtin_neon_vst1q_bf16_x2(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 43); \ }) #else #define vst1q_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_bf16_x2(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst1q_bf16_x2(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_bf16_x2(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 11); \ + __builtin_neon_vst1_bf16_x2(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 11); \ }) #else #define vst1_bf16_x2(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_bf16_x2(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst1_bf16_x2(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_bf16_x3(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 43); \ + __builtin_neon_vst1q_bf16_x3(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 43); \ }) #else #define vst1q_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_bf16_x3(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst1q_bf16_x3(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_bf16_x3(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 11); \ + __builtin_neon_vst1_bf16_x3(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 11); \ }) #else #define vst1_bf16_x3(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_bf16_x3(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst1_bf16_x3(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_bf16_x4(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 43); \ + __builtin_neon_vst1q_bf16_x4(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 43); \ }) #else #define vst1q_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_bf16_x4(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst1q_bf16_x4(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_bf16_x4(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 11); \ + __builtin_neon_vst1_bf16_x4(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 11); \ }) #else #define vst1_bf16_x4(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_bf16_x4(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst1_bf16_x4(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 43); \ + __builtin_neon_vst2q_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 43); \ }) #else #define vst2q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 11); \ + __builtin_neon_vst2_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 11); \ }) #else #define vst2_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 43); \ + __builtin_neon_vst2q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 43); \ }) #else #define vst2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x2_t __s1 = __p1; \ bfloat16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 11); \ + __builtin_neon_vst2_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 11); \ }) #else #define vst2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x2_t __s1 = __p1; \ bfloat16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 43); \ + __builtin_neon_vst3q_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 43); \ }) #else #define vst3q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 11); \ + __builtin_neon_vst3_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 11); \ }) #else #define vst3_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 43); \ + __builtin_neon_vst3q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 43); \ }) #else #define vst3q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x3_t __s1 = __p1; \ bfloat16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 11); \ + __builtin_neon_vst3_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 11); \ }) #else #define vst3_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x3_t __s1 = __p1; \ bfloat16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 43); \ + __builtin_neon_vst4q_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 43); \ }) #else #define vst4q_bf16(__p0, __p1) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 11); \ + __builtin_neon_vst4_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 11); \ }) #else #define vst4_bf16(__p0, __p1) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_bf16(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 43); \ + __builtin_neon_vst4q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 43); \ }) #else #define vst4q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x8x4_t __s1 = __p1; \ bfloat16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_bf16(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 43); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_lane_bf16(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 43); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_bf16(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 11); \ + __builtin_neon_vst4_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 11); \ }) #else #define vst4_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ bfloat16x4x4_t __s1 = __p1; \ bfloat16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_bf16(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 11); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_lane_bf16(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 11); \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod,neon"))) uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vdotq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("dotprod,neon"))) uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vdotq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("dotprod,neon"))) uint32x4_t __noswap_vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vdotq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #endif @@ -1575,22 +1596,22 @@ __ai __attribute__((target("dotprod,neon"))) uint32x4_t __noswap_vdotq_u32(uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod,neon"))) int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vdotq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("dotprod,neon"))) int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vdotq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("dotprod,neon"))) int32x4_t __noswap_vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vdotq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #endif @@ -1598,22 +1619,22 @@ __ai __attribute__((target("dotprod,neon"))) int32x4_t __noswap_vdotq_s32(int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod,neon"))) uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vdot_u32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 18)); return __ret; } #else __ai __attribute__((target("dotprod,neon"))) uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vdot_u32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("dotprod,neon"))) uint32x2_t __noswap_vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vdot_u32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 18)); return __ret; } #endif @@ -1621,22 +1642,22 @@ __ai __attribute__((target("dotprod,neon"))) uint32x2_t __noswap_vdot_u32(uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("dotprod,neon"))) int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vdot_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #else __ai __attribute__((target("dotprod,neon"))) int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vdot_s32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("dotprod,neon"))) int32x2_t __noswap_vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vdot_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #endif @@ -1644,16 +1665,16 @@ __ai __attribute__((target("dotprod,neon"))) int32x2_t __noswap_vdot_s32(int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vabdq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vabdq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vabdq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vabdq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1661,16 +1682,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vabdq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vabd_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vabd_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vabd_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vabd_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1678,15 +1699,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vabd_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vabsq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vabsq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vabsq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vabsq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vabsq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vabsq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1694,15 +1715,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vabsq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vabs_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vabs_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vabs_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vabs_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1716,10 +1737,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vaddq_f16(float16x8_t #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1733,10 +1754,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vadd_f16(float16x4_t _ #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1744,16 +1765,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vadd_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcageq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcageq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcageq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcageq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1761,16 +1782,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcageq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcage_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcage_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcage_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcage_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1778,16 +1799,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcage_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcagtq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcagtq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcagtq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcagtq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1795,16 +1816,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcagtq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcagt_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcagt_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcagt_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcagt_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1812,16 +1833,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcagt_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcaleq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcaleq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcaleq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcaleq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1829,16 +1850,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcaleq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcale_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcale_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcale_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcale_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1846,16 +1867,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcale_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcaltq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcaltq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcaltq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcaltq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1863,16 +1884,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcaltq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcalt_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcalt_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcalt_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcalt_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1880,16 +1901,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcalt_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1897,16 +1918,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vceqq_f16(float16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1914,15 +1935,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vceq_f16(float16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vceqzq_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vceqzq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vceqzq_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vceqzq_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vceqzq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1930,15 +1951,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vceqzq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vceqz_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vceqz_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vceqz_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vceqz_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vceqz_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1946,16 +1967,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vceqz_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1963,16 +1984,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgeq_f16(float16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -1980,15 +2001,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcge_f16(float16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgezq_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgezq_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgezq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgezq_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcgezq_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgezq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -1996,15 +2017,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgezq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgez_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgez_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgez_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgez_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcgez_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgez_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2012,16 +2033,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgez_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2029,16 +2050,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgtq_f16(float16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2046,15 +2067,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgt_f16(float16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgtzq_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgtzq_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgtzq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgtzq_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcgtzq_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgtzq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2062,15 +2083,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcgtzq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgtz_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgtz_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgtz_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgtz_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcgtz_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgtz_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2078,16 +2099,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcgtz_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2095,16 +2116,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcleq_f16(float16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2112,15 +2133,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcle_f16(float16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vclezq_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclezq_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vclezq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vclezq_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vclezq_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vclezq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2128,15 +2149,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vclezq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vclez_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclez_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vclez_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vclez_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vclez_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vclez_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2144,16 +2165,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vclez_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2161,16 +2182,16 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcltq_f16(float16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2178,15 +2199,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vclt_f16(float16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcltzq_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcltzq_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcltzq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcltzq_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcltzq_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcltzq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2194,15 +2215,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcltzq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcltz_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcltz_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcltz_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcltz_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcltz_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcltz_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2210,15 +2231,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcltz_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_u16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_f16_u16(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_u16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_f16_u16(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2226,15 +2247,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vcvtq_f16_u16(uint16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vcvtq_f16_s16(int16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_s16((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_f16_s16(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vcvtq_f16_s16(int16x8_t __p0) { float16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvtq_f16_s16((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_f16_s16(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2242,15 +2263,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vcvtq_f16_s16(int16x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_u16(uint16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_u16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_u16(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_u16(uint16x4_t __p0) { float16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcvt_f16_u16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_u16(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2258,15 +2279,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_u16(uint16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_s16((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_s16(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t __p0) { float16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcvt_f16_s16((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_s16(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2275,16 +2296,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_u16((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_n_f16_u16(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vcvtq_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_u16((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_n_f16_u16(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -2293,16 +2314,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_s16((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_n_f16_s16(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vcvtq_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vcvtq_n_f16_s16((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvtq_n_f16_s16(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -2311,16 +2332,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_u16((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_n_f16_u16(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vcvt_n_f16_u16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_u16((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_n_f16_u16(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -2329,16 +2350,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_s16((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_n_f16_s16(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vcvt_n_f16_s16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vcvt_n_f16_s16((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_n_f16_s16(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -2347,16 +2368,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_f16((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtq_n_s16_f16(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vcvtq_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vcvtq_n_s16_f16((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtq_n_s16_f16(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -2365,16 +2386,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_f16((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvt_n_s16_f16(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vcvt_n_s16_f16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vcvt_n_s16_f16((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvt_n_s16_f16(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -2383,16 +2404,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_f16((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtq_n_u16_f16(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vcvtq_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vcvtq_n_u16_f16((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtq_n_u16_f16(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -2401,16 +2422,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_f16((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvt_n_u16_f16(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vcvt_n_u16_f16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vcvt_n_u16_f16((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvt_n_u16_f16(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -2418,15 +2439,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vcvt_f16_s16(int16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtq_s16_f16((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtq_s16_f16(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vcvtq_s16_f16((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtq_s16_f16(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2434,15 +2455,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtq_s16_f16(float16x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvt_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvt_s16_f16((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvt_s16_f16(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvt_s16_f16(float16x4_t __p0) { int16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcvt_s16_f16((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvt_s16_f16(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2450,15 +2471,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvt_s16_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtq_u16_f16(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtq_u16_f16(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2466,15 +2487,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtq_u16_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvt_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvt_u16_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvt_u16_f16(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvt_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcvt_u16_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvt_u16_f16(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2482,15 +2503,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvt_u16_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_f16((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtaq_s16_f16(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_f16((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtaq_s16_f16(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2498,15 +2519,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtaq_s16_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvta_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvta_s16_f16((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvta_s16_f16(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvta_s16_f16(float16x4_t __p0) { int16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcvta_s16_f16((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvta_s16_f16(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2514,15 +2535,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvta_s16_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtaq_u16_f16(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtaq_u16_f16(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2530,15 +2551,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtaq_u16_f16(float16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvta_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvta_u16_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvta_u16_f16(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvta_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcvta_u16_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvta_u16_f16(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2546,15 +2567,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvta_u16_f16(float16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_f16((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtmq_s16_f16(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_f16((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtmq_s16_f16(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2562,15 +2583,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtmq_s16_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtm_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtm_s16_f16((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvtm_s16_f16(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtm_s16_f16(float16x4_t __p0) { int16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcvtm_s16_f16((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvtm_s16_f16(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2578,15 +2599,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtm_s16_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtmq_u16_f16(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtmq_u16_f16(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2594,15 +2615,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtmq_u16_f16(float16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvtm_u16_f16(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvtm_u16_f16(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2610,15 +2631,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtm_u16_f16(float16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_f16((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtnq_s16_f16(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_f16((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtnq_s16_f16(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2626,15 +2647,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtnq_s16_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtn_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtn_s16_f16((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvtn_s16_f16(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtn_s16_f16(float16x4_t __p0) { int16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcvtn_s16_f16((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvtn_s16_f16(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2642,15 +2663,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtn_s16_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtnq_u16_f16(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtnq_u16_f16(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2658,15 +2679,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtnq_u16_f16(float16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvtn_u16_f16(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvtn_u16_f16(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2674,15 +2695,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtn_u16_f16(float16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_f16((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtpq_s16_f16(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_f16((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vcvtpq_s16_f16(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2690,15 +2711,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x8_t vcvtpq_s16_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtp_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcvtp_s16_f16((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvtp_s16_f16(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtp_s16_f16(float16x4_t __p0) { int16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcvtp_s16_f16((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcvtp_s16_f16(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2706,15 +2727,15 @@ __ai __attribute__((target("fullfp16,neon"))) int16x4_t vcvtp_s16_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_f16((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtpq_u16_f16(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_f16((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcvtpq_u16_f16(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2722,15 +2743,15 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x8_t vcvtpq_u16_f16(float16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_f16((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvtp_u16_f16(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_f16((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcvtp_u16_f16(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2738,22 +2759,22 @@ __ai __attribute__((target("fullfp16,neon"))) uint16x4_t vcvtp_u16_f16(float16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("fullfp16,neon"))) float16x8_t __noswap_vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #endif @@ -2761,22 +2782,22 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t __noswap_vfmaq_f16(flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("fullfp16,neon"))) float16x4_t __noswap_vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #endif @@ -2790,11 +2811,11 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vfmsq_f16(float16x8_t #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vfmaq_f16(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2808,11 +2829,11 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vfms_f16(float16x4_t _ #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __noswap_vfma_f16(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2820,16 +2841,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vfms_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmaxq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vmaxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmaxq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2837,16 +2858,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmaxq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmax_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vmax_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmax_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2854,16 +2875,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmax_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vminq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vminq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vminq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2871,16 +2892,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vminq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmin_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vmin_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmin_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2894,10 +2915,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmulq_f16(float16x8_t #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2911,10 +2932,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmul_f16(float16x4_t _ #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2932,9 +2953,9 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmul_f16(float16x4_t _ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ __ret = __rev0 * (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}; \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -2952,9 +2973,9 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmul_f16(float16x4_t _ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ __ret = __rev0 * (float16x4_t) {__s1, __s1, __s1, __s1}; \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -2968,9 +2989,9 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vnegq_f16(float16x8_t #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vnegq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -2984,9 +3005,9 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vneg_f16(float16x4_t _ #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vneg_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -2994,16 +3015,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vneg_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpadd_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpadd_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vpadd_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpadd_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3011,16 +3032,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpadd_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpmax_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vpmax_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpmax_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3028,16 +3049,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmax_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpmin_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vpmin_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpmin_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3045,15 +3066,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmin_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrecpeq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrecpeq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrecpeq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrecpeq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -3061,15 +3082,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrecpeq_f16(float16x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrecpe_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrecpe_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrecpe_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrecpe_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3077,16 +3098,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrecpe_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrecpsq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrecpsq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrecpsq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrecpsq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -3094,16 +3115,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrecpsq_f16(float16x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrecps_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrecps_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrecps_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrecps_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3111,15 +3132,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrecps_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrsqrteq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrsqrteq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrsqrteq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrsqrteq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -3127,15 +3148,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrsqrteq_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrsqrte_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrsqrte_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrsqrte_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrsqrte_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3143,16 +3164,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrsqrte_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrsqrtsq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrsqrtsq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrsqrtsq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrsqrtsq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -3160,16 +3181,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrsqrtsq_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrsqrts_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrsqrts_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrsqrts_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrsqrts_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3183,10 +3204,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vsubq_f16(float16x8_t #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -3200,10 +3221,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vsub_f16(float16x4_t _ #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -3211,17 +3232,17 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vsub_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm,neon"))) uint32x4_t vmmlaq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmmlaq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmmlaq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("i8mm,neon"))) uint32x4_t vmmlaq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vmmlaq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmmlaq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -3229,17 +3250,17 @@ __ai __attribute__((target("i8mm,neon"))) uint32x4_t vmmlaq_u32(uint32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm,neon"))) int32x4_t vmmlaq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmmlaq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmmlaq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("i8mm,neon"))) int32x4_t vmmlaq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vmmlaq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmmlaq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -3247,22 +3268,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vmmlaq_s32(int32x4_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vusdotq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("i8mm,neon"))) int32x4_t vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vusdotq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("i8mm,neon"))) int32x4_t __noswap_vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vusdotq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #endif @@ -3270,22 +3291,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t __noswap_vusdotq_s32(int32x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm,neon"))) int32x2_t vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vusdot_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #else __ai __attribute__((target("i8mm,neon"))) int32x2_t vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vusdot_s32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("i8mm,neon"))) int32x2_t __noswap_vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vusdot_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #endif @@ -3293,17 +3314,17 @@ __ai __attribute__((target("i8mm,neon"))) int32x2_t __noswap_vusdot_s32(int32x2_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vusmmlaq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vusmmlaq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vusmmlaq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vusmmlaq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -3312,22 +3333,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 4)); \ __ret; \ }) #else #define splat_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_splat_lane_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 4)); \ __ret; \ }) #endif @@ -3335,29 +3356,29 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ - __ret = (poly64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 5)); \ __ret; \ }) #else #define splat_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_lane_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 5)); \ __ret; \ }) #endif @@ -3366,22 +3387,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 4); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 4)); \ __ret; \ }) #else #define splatq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_splatq_lane_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 4); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 4)); \ __ret; \ }) #endif @@ -3390,21 +3411,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x1_t __s0 = __p0; \ - __ret = (poly64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 6); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 6)); \ __ret; \ }) #else #define splatq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x1_t __s0 = __p0; \ - __ret = (poly64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 6); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 6)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_lane_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x1_t __s0 = __p0; \ - __ret = (poly64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 6); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 6)); \ __ret; \ }) #endif @@ -3413,22 +3434,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 5); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 5)); \ __ret; \ }) #else #define splatq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_lane_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 5); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 5)); \ __ret; \ }) #endif @@ -3437,22 +3458,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define splatq_lane_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_splatq_lane_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -3461,22 +3482,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define splatq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_splatq_lane_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -3485,21 +3506,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #else #define splatq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 19); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_lane_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #endif @@ -3508,22 +3529,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define splatq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_lane_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -3532,22 +3553,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #else #define splatq_lane_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_splatq_lane_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #endif @@ -3556,21 +3577,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 10); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 10)); \ __ret; \ }) #else #define splatq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 10); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 10)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_lane_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 10); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 10)); \ __ret; \ }) #endif @@ -3579,22 +3600,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 9); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 9)); \ __ret; \ }) #else #define splatq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_splatq_lane_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 9); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 9)); \ __ret; \ }) #endif @@ -3603,22 +3624,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 8)); \ __ret; \ }) #else #define splatq_lane_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_lane_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 8)); \ __ret; \ }) #endif @@ -3627,22 +3648,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define splatq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_splatq_lane_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #endif @@ -3651,21 +3672,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #else #define splatq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 3); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_lane_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #endif @@ -3674,22 +3695,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define splatq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_lane_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #endif @@ -3698,22 +3719,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define splat_lane_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_splat_lane_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -3722,22 +3743,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define splat_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_splat_lane_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -3745,29 +3766,29 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define splat_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_lane_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -3776,22 +3797,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #else #define splat_lane_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_splat_lane_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #endif @@ -3799,29 +3820,29 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (float64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 10)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 9)); \ __ret; \ }) #else #define splat_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_splat_lane_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 9)); \ __ret; \ }) #endif @@ -3830,22 +3851,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 8)); \ __ret; \ }) #else #define splat_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_lane_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 8)); \ __ret; \ }) #endif @@ -3854,22 +3875,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define splat_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_splat_lane_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #endif @@ -3877,29 +3898,29 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_lane_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define splat_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define splat_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_lane_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #endif @@ -3908,22 +3929,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = (poly8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 36); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 36)); \ __ret; \ }) #else #define splat_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_splat_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = (poly8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 36); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 36)); \ __ret; \ }) #endif @@ -3932,21 +3953,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = (poly64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 38); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 38)); \ __ret; \ }) #else #define splat_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x2_t __s0 = __p0; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (poly64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 38); \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 38)); \ __ret; \ }) #define __noswap_splat_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = (poly64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 38); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 38)); \ __ret; \ }) #endif @@ -3955,22 +3976,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = (poly16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 37); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 37)); \ __ret; \ }) #else #define splat_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = (poly16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 37); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 37)); \ __ret; \ }) #endif @@ -3979,22 +4000,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = (poly8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 36)); \ __ret; \ }) #else #define splatq_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_splatq_laneq_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = (poly8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 36)); \ __ret; \ }) #endif @@ -4003,22 +4024,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = (poly64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 38)); \ __ret; \ }) #else #define splatq_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_laneq_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = (poly64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 38)); \ __ret; \ }) #endif @@ -4027,22 +4048,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = (poly16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 37)); \ __ret; \ }) #else #define splatq_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_laneq_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = (poly16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 37)); \ __ret; \ }) #endif @@ -4051,22 +4072,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define splatq_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_splatq_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #endif @@ -4075,22 +4096,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define splatq_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_splatq_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #endif @@ -4099,22 +4120,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define splatq_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #endif @@ -4123,22 +4144,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define splatq_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #endif @@ -4147,22 +4168,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #else #define splatq_laneq_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_splatq_laneq_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #endif @@ -4171,22 +4192,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 42)); \ __ret; \ }) #else #define splatq_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_laneq_f64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 42)); \ __ret; \ }) #endif @@ -4195,22 +4216,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 41)); \ __ret; \ }) #else #define splatq_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_splatq_laneq_f32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 41)); \ __ret; \ }) #endif @@ -4219,22 +4240,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 40)); \ __ret; \ }) #else #define splatq_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_laneq_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 40)); \ __ret; \ }) #endif @@ -4243,22 +4264,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define splatq_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_splatq_laneq_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #endif @@ -4267,22 +4288,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define splatq_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_splatq_laneq_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #endif @@ -4291,22 +4312,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splatq_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define splatq_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_splatq_laneq_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #endif @@ -4315,22 +4336,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define splat_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_splat_laneq_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #endif @@ -4339,22 +4360,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define splat_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_splat_laneq_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #endif @@ -4363,21 +4384,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define splat_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 51); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ __ret; \ }) #define __noswap_splat_laneq_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #endif @@ -4386,22 +4407,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define splat_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_laneq_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #endif @@ -4410,22 +4431,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #else #define splat_laneq_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_splat_laneq_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #endif @@ -4434,21 +4455,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 42); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 42)); \ __ret; \ }) #else #define splat_laneq_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 42); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 42)); \ __ret; \ }) #define __noswap_splat_laneq_f64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 42); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 42)); \ __ret; \ }) #endif @@ -4457,22 +4478,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 41); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 41)); \ __ret; \ }) #else #define splat_laneq_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_splat_laneq_f32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 41); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 41)); \ __ret; \ }) #endif @@ -4481,22 +4502,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 40); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 40)); \ __ret; \ }) #else #define splat_laneq_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_laneq_f16(__p0, __p1) __extension__ ({ \ float16x4_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 40); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 40)); \ __ret; \ }) #endif @@ -4505,22 +4526,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define splat_laneq_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_splat_laneq_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #endif @@ -4529,21 +4550,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define splat_laneq_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 35); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ __ret; \ }) #define __noswap_splat_laneq_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #endif @@ -4552,22 +4573,22 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #define splat_laneq_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define splat_laneq_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_splat_laneq_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #endif @@ -4575,21 +4596,21 @@ __ai __attribute__((target("i8mm,neon"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t __noswap_vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #endif @@ -4597,21 +4618,21 @@ __ai __attribute__((target("neon"))) uint8x16_t __noswap_vabdq_u8(uint8x16_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #endif @@ -4619,21 +4640,21 @@ __ai __attribute__((target("neon"))) uint32x4_t __noswap_vabdq_u32(uint32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #endif @@ -4641,21 +4662,21 @@ __ai __attribute__((target("neon"))) uint16x8_t __noswap_vabdq_u16(uint16x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } __ai __attribute__((target("neon"))) int8x16_t __noswap_vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #endif @@ -4663,16 +4684,16 @@ __ai __attribute__((target("neon"))) int8x16_t __noswap_vabdq_s8(int8x16_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -4680,21 +4701,21 @@ __ai __attribute__((target("neon"))) float32x4_t vabdq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #endif @@ -4702,21 +4723,21 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vabdq_s32(int32x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #endif @@ -4724,21 +4745,21 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vabdq_s16(int16x8_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #endif @@ -4746,21 +4767,21 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vabd_u8(uint8x8_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #endif @@ -4768,21 +4789,21 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vabd_u32(uint32x2_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #endif @@ -4790,21 +4811,21 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vabd_u16(uint16x4_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #endif @@ -4812,16 +4833,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vabd_s8(int8x8_t __p0, in #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -4829,21 +4850,21 @@ __ai __attribute__((target("neon"))) float32x2_t vabd_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #endif @@ -4851,21 +4872,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vabd_s32(int32x2_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #endif @@ -4873,15 +4894,15 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vabd_s16(int16x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vabsq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vabsq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -4889,15 +4910,15 @@ __ai __attribute__((target("neon"))) int8x16_t vabsq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vabsq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vabsq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -4905,15 +4926,15 @@ __ai __attribute__((target("neon"))) float32x4_t vabsq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vabsq_s32(int32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vabsq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -4921,15 +4942,15 @@ __ai __attribute__((target("neon"))) int32x4_t vabsq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vabsq_s16(int16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vabsq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -4937,15 +4958,15 @@ __ai __attribute__((target("neon"))) int16x8_t vabsq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vabs_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vabs_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -4953,15 +4974,15 @@ __ai __attribute__((target("neon"))) int8x8_t vabs_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vabs_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vabs_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -4969,15 +4990,15 @@ __ai __attribute__((target("neon"))) float32x2_t vabs_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vabs_s32(int32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vabs_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -4985,15 +5006,15 @@ __ai __attribute__((target("neon"))) int32x2_t vabs_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vabs_s16(int16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vabs_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5007,10 +5028,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5024,10 +5045,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vaddq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5041,10 +5062,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vaddq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t vaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5058,10 +5079,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vaddq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5075,10 +5096,10 @@ __ai __attribute__((target("neon"))) int8x16_t vaddq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5092,10 +5113,10 @@ __ai __attribute__((target("neon"))) float32x4_t vaddq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5109,10 +5130,10 @@ __ai __attribute__((target("neon"))) int32x4_t vaddq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5126,10 +5147,10 @@ __ai __attribute__((target("neon"))) int64x2_t vaddq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t vaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5143,10 +5164,10 @@ __ai __attribute__((target("neon"))) int16x8_t vaddq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5160,10 +5181,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -5177,10 +5198,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vadd_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5199,10 +5220,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vadd_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5216,10 +5237,10 @@ __ai __attribute__((target("neon"))) int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -5233,10 +5254,10 @@ __ai __attribute__((target("neon"))) float32x2_t vadd_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5250,10 +5271,10 @@ __ai __attribute__((target("neon"))) int32x2_t vadd_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5272,10 +5293,10 @@ __ai __attribute__((target("neon"))) int16x4_t vadd_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5283,38 +5304,38 @@ __ai __attribute__((target("neon"))) int16x4_t vadd_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) poly64x1_t vadd_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x1_t __ret; - __ret = (poly64x1_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 6); + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 6)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 5); + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 5)); return __ret; } #else __ai __attribute__((target("neon"))) poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 5)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5322,16 +5343,16 @@ __ai __attribute__((target("neon"))) poly16x4_t vadd_p16(poly16x4_t __p0, poly16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5339,16 +5360,16 @@ __ai __attribute__((target("neon"))) poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 38); + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 38)); return __ret; } #else __ai __attribute__((target("neon"))) poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 38); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 38)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5356,16 +5377,16 @@ __ai __attribute__((target("neon"))) poly64x2_t vaddq_p64(poly64x2_t __p0, poly6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 37); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 37)); return __ret; } #else __ai __attribute__((target("neon"))) poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 37); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 37)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5373,21 +5394,21 @@ __ai __attribute__((target("neon"))) poly16x8_t vaddq_p16(poly16x8_t __p0, poly1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #endif @@ -5395,21 +5416,21 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vaddhn_u32(uint32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #endif @@ -5417,21 +5438,21 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vaddhn_u64(uint64x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #endif @@ -5439,21 +5460,21 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vaddhn_u16(uint16x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #endif @@ -5461,21 +5482,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vaddhn_s32(int32x4_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #endif @@ -5483,21 +5504,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vaddhn_s64(int64x2_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vaddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #endif @@ -5511,10 +5532,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vandq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vandq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5528,10 +5549,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vandq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vandq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5545,10 +5566,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vandq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t vandq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5562,10 +5583,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vandq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vandq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5579,10 +5600,10 @@ __ai __attribute__((target("neon"))) int8x16_t vandq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vandq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5596,10 +5617,10 @@ __ai __attribute__((target("neon"))) int32x4_t vandq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vandq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5613,10 +5634,10 @@ __ai __attribute__((target("neon"))) int64x2_t vandq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t vandq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5630,10 +5651,10 @@ __ai __attribute__((target("neon"))) int16x8_t vandq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vandq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5647,10 +5668,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -5664,10 +5685,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vand_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5686,10 +5707,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vand_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5703,10 +5724,10 @@ __ai __attribute__((target("neon"))) int8x8_t vand_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vand_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -5720,10 +5741,10 @@ __ai __attribute__((target("neon"))) int32x2_t vand_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5742,10 +5763,10 @@ __ai __attribute__((target("neon"))) int16x4_t vand_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 & __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5759,10 +5780,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5776,10 +5797,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vbicq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vbicq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5793,10 +5814,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vbicq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t vbicq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5810,10 +5831,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vbicq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vbicq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5827,10 +5848,10 @@ __ai __attribute__((target("neon"))) int8x16_t vbicq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vbicq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -5844,10 +5865,10 @@ __ai __attribute__((target("neon"))) int32x4_t vbicq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vbicq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -5861,10 +5882,10 @@ __ai __attribute__((target("neon"))) int64x2_t vbicq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t vbicq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -5878,10 +5899,10 @@ __ai __attribute__((target("neon"))) int16x8_t vbicq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vbicq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -5895,10 +5916,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -5912,10 +5933,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vbic_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5934,10 +5955,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vbic_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -5951,10 +5972,10 @@ __ai __attribute__((target("neon"))) int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -5968,10 +5989,10 @@ __ai __attribute__((target("neon"))) int32x2_t vbic_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -5990,10 +6011,10 @@ __ai __attribute__((target("neon"))) int16x4_t vbic_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 & ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6001,17 +6022,17 @@ __ai __attribute__((target("neon"))) int16x4_t vbic_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) { poly8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + poly8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6019,17 +6040,17 @@ __ai __attribute__((target("neon"))) poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) { poly16x4_t __ret; - __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 5); + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 5)); return __ret; } #else __ai __attribute__((target("neon"))) poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) { poly16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - poly16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + poly16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 5)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6037,17 +6058,17 @@ __ai __attribute__((target("neon"))) poly16x4_t vbsl_p16(uint16x4_t __p0, poly16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) { poly8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + poly8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6055,17 +6076,17 @@ __ai __attribute__((target("neon"))) poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) { poly16x8_t __ret; - __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 37); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 37)); return __ret; } #else __ai __attribute__((target("neon"))) poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) { poly16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 37); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + poly16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 37)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6073,17 +6094,17 @@ __ai __attribute__((target("neon"))) poly16x8_t vbslq_p16(uint16x8_t __p0, poly1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6091,17 +6112,17 @@ __ai __attribute__((target("neon"))) uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6109,17 +6130,17 @@ __ai __attribute__((target("neon"))) uint32x4_t vbslq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -6127,17 +6148,17 @@ __ai __attribute__((target("neon"))) uint64x2_t vbslq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6145,17 +6166,17 @@ __ai __attribute__((target("neon"))) uint16x8_t vbslq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6163,17 +6184,17 @@ __ai __attribute__((target("neon"))) int8x16_t vbslq_s8(uint8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6181,17 +6202,17 @@ __ai __attribute__((target("neon"))) float32x4_t vbslq_f32(uint32x4_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6199,17 +6220,17 @@ __ai __attribute__((target("neon"))) int32x4_t vbslq_s32(uint32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -6217,17 +6238,17 @@ __ai __attribute__((target("neon"))) int64x2_t vbslq_s64(uint64x2_t __p0, int64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6235,17 +6256,17 @@ __ai __attribute__((target("neon"))) int16x8_t vbslq_s16(uint16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6253,40 +6274,40 @@ __ai __attribute__((target("neon"))) uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6294,17 +6315,17 @@ __ai __attribute__((target("neon"))) uint16x4_t vbsl_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6312,17 +6333,17 @@ __ai __attribute__((target("neon"))) int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6330,40 +6351,40 @@ __ai __attribute__((target("neon"))) float32x2_t vbsl_f32(uint32x2_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6371,17 +6392,17 @@ __ai __attribute__((target("neon"))) int16x4_t vbsl_s16(uint16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #else __ai __attribute__((target("neon"))) float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6389,17 +6410,17 @@ __ai __attribute__((target("neon"))) float16x8_t vbslq_f16(uint16x8_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #else __ai __attribute__((target("neon"))) float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vbsl_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6407,16 +6428,16 @@ __ai __attribute__((target("neon"))) float16x4_t vbsl_f16(uint16x4_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcageq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcageq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6424,16 +6445,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcageq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcage_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcage_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6441,16 +6462,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcage_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcagtq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcagtq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6458,16 +6479,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcagtq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcagt_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcagt_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6475,16 +6496,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcagt_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcaleq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcaleq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6492,16 +6513,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcaleq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcale_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcale_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6509,16 +6530,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcale_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcaltq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcaltq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6526,16 +6547,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcaltq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcalt_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcalt_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6543,16 +6564,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcalt_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6560,16 +6581,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6577,16 +6598,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6594,16 +6615,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6611,16 +6632,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vceqq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6628,16 +6649,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vceqq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6645,16 +6666,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vceqq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6662,16 +6683,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vceqq_f32(float32x4_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6679,16 +6700,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vceqq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6696,16 +6717,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vceqq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6713,16 +6734,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6730,16 +6751,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vceq_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6747,16 +6768,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vceq_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6764,16 +6785,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6781,16 +6802,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vceq_f32(float32x2_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6798,16 +6819,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vceq_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6815,16 +6836,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vceq_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6832,16 +6853,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6849,16 +6870,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgeq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6866,16 +6887,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcgeq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -6883,16 +6904,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6900,16 +6921,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgeq_f32(float32x4_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -6917,16 +6938,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgeq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -6934,16 +6955,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcgeq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -6951,16 +6972,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -6968,16 +6989,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcge_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -6985,16 +7006,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vcge_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7002,16 +7023,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7019,16 +7040,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcge_f32(float32x2_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7036,16 +7057,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcge_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7053,16 +7074,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vcge_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7070,16 +7091,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7087,16 +7108,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgtq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7104,16 +7125,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcgtq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7121,16 +7142,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7138,16 +7159,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgtq_f32(float32x4_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7155,16 +7176,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgtq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7172,16 +7193,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcgtq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7189,16 +7210,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7206,16 +7227,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcgt_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7223,16 +7244,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vcgt_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7240,16 +7261,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7257,16 +7278,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcgt_f32(float32x2_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7274,16 +7295,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcgt_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7291,16 +7312,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vcgt_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7308,16 +7329,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7325,16 +7346,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcleq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7342,16 +7363,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcleq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7359,16 +7380,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcleq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7376,16 +7397,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcleq_f32(float32x4_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7393,16 +7414,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcleq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7410,16 +7431,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcleq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7427,16 +7448,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7444,16 +7465,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcle_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7461,16 +7482,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vcle_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7478,16 +7499,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7495,16 +7516,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcle_f32(float32x2_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7512,16 +7533,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vcle_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7529,15 +7550,15 @@ __ai __attribute__((target("neon"))) uint16x4_t vcle_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vclsq_u8(uint8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vclsq_u8(uint8x16_t __p0) { int8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7545,15 +7566,15 @@ __ai __attribute__((target("neon"))) int8x16_t vclsq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vclsq_u32(uint32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vclsq_u32(uint32x4_t __p0) { int32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7561,15 +7582,15 @@ __ai __attribute__((target("neon"))) int32x4_t vclsq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vclsq_u16(uint16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vclsq_u16(uint16x8_t __p0) { int16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7577,15 +7598,15 @@ __ai __attribute__((target("neon"))) int16x8_t vclsq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vclsq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vclsq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7593,15 +7614,15 @@ __ai __attribute__((target("neon"))) int8x16_t vclsq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vclsq_s32(int32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vclsq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7609,15 +7630,15 @@ __ai __attribute__((target("neon"))) int32x4_t vclsq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vclsq_s16(int16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vclsq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vclsq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7625,15 +7646,15 @@ __ai __attribute__((target("neon"))) int16x8_t vclsq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vcls_u8(uint8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vcls_u8(uint8x8_t __p0) { int8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7641,15 +7662,15 @@ __ai __attribute__((target("neon"))) int8x8_t vcls_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcls_u32(uint32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcls_u32(uint32x2_t __p0) { int32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7657,15 +7678,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcls_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vcls_u16(uint16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vcls_u16(uint16x4_t __p0) { int16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7673,15 +7694,15 @@ __ai __attribute__((target("neon"))) int16x4_t vcls_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vcls_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vcls_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7689,15 +7710,15 @@ __ai __attribute__((target("neon"))) int8x8_t vcls_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcls_s32(int32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcls_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7705,15 +7726,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcls_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vcls_s16(int16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vcls_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vcls_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7721,16 +7742,16 @@ __ai __attribute__((target("neon"))) int16x4_t vcls_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7738,16 +7759,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7755,16 +7776,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcltq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7772,16 +7793,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcltq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint8x16_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7789,16 +7810,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vcltq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7806,16 +7827,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcltq_f32(float32x4_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint32x4_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7823,16 +7844,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vcltq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint16x8_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -7840,16 +7861,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vcltq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7857,16 +7878,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7874,16 +7895,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vclt_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7891,16 +7912,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vclt_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint8x8_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -7908,16 +7929,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7925,16 +7946,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vclt_f32(float32x2_t __p0, float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint32x2_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -7942,16 +7963,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vclt_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint16x4_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -7959,15 +7980,15 @@ __ai __attribute__((target("neon"))) uint16x4_t vclt_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vclzq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __p0), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vclzq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __rev0), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -7975,15 +7996,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vclzq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vclzq_u32(uint32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vclzq_u32(uint32x4_t __p0) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -7991,15 +8012,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vclzq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vclzq_u16(uint16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vclzq_u16(uint16x8_t __p0) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -8007,15 +8028,15 @@ __ai __attribute__((target("neon"))) uint16x8_t vclzq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vclzq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vclzq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -8023,15 +8044,15 @@ __ai __attribute__((target("neon"))) int8x16_t vclzq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vclzq_s32(int32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vclzq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -8039,15 +8060,15 @@ __ai __attribute__((target("neon"))) int32x4_t vclzq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vclzq_s16(int16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vclzq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vclzq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -8055,15 +8076,15 @@ __ai __attribute__((target("neon"))) int16x8_t vclzq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vclz_u8(uint8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vclz_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -8071,15 +8092,15 @@ __ai __attribute__((target("neon"))) uint8x8_t vclz_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vclz_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vclz_u32(uint32x2_t __p0) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -8087,15 +8108,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vclz_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vclz_u16(uint16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vclz_u16(uint16x4_t __p0) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -8103,15 +8124,15 @@ __ai __attribute__((target("neon"))) uint16x4_t vclz_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vclz_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vclz_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -8119,15 +8140,15 @@ __ai __attribute__((target("neon"))) int8x8_t vclz_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vclz_s32(int32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vclz_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -8135,15 +8156,15 @@ __ai __attribute__((target("neon"))) int32x2_t vclz_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vclz_s16(int16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vclz_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vclz_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -8151,15 +8172,15 @@ __ai __attribute__((target("neon"))) int16x4_t vclz_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vcnt_p8(poly8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vcnt_v(__builtin_bit_cast(int8x8_t, __p0), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vcnt_p8(poly8x8_t __p0) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vcnt_v(__builtin_bit_cast(int8x8_t, __rev0), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -8167,15 +8188,15 @@ __ai __attribute__((target("neon"))) poly8x8_t vcnt_p8(poly8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vcntq_p8(poly8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vcntq_v(__builtin_bit_cast(int8x16_t, __p0), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vcntq_p8(poly8x16_t __p0) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vcntq_v(__builtin_bit_cast(int8x16_t, __rev0), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -8183,15 +8204,15 @@ __ai __attribute__((target("neon"))) poly8x16_t vcntq_p8(poly8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcntq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcntq_v(__builtin_bit_cast(int8x16_t, __p0), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcntq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcntq_v(__builtin_bit_cast(int8x16_t, __rev0), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -8199,15 +8220,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vcntq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vcntq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vcntq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vcntq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vcntq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -8215,15 +8236,15 @@ __ai __attribute__((target("neon"))) int8x16_t vcntq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcnt_u8(uint8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcnt_v(__builtin_bit_cast(int8x8_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcnt_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcnt_v(__builtin_bit_cast(int8x8_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -8231,15 +8252,15 @@ __ai __attribute__((target("neon"))) uint8x8_t vcnt_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vcnt_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vcnt_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vcnt_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vcnt_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -8253,10 +8274,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vcombine_p8(poly8x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly8x16_t vcombine_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x16_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -8270,10 +8291,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vcombine_p16(poly16x4_t __p0, po #else __ai __attribute__((target("neon"))) poly16x8_t vcombine_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x8_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -8287,10 +8308,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vcombine_u8(uint8x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint8x16_t vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t __noswap_vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) { @@ -8309,10 +8330,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vcombine_u32(uint32x2_t __p0, ui #else __ai __attribute__((target("neon"))) uint32x4_t vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) { @@ -8332,7 +8353,7 @@ __ai __attribute__((target("neon"))) uint64x2_t vcombine_u64(uint64x1_t __p0, ui __ai __attribute__((target("neon"))) uint64x2_t vcombine_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -8346,10 +8367,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vcombine_u16(uint16x4_t __p0, ui #else __ai __attribute__((target("neon"))) uint16x8_t vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) { @@ -8368,10 +8389,10 @@ __ai __attribute__((target("neon"))) int8x16_t vcombine_s8(int8x8_t __p0, int8x8 #else __ai __attribute__((target("neon"))) int8x16_t vcombine_s8(int8x8_t __p0, int8x8_t __p1) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } __ai __attribute__((target("neon"))) int8x16_t __noswap_vcombine_s8(int8x8_t __p0, int8x8_t __p1) { @@ -8390,10 +8411,10 @@ __ai __attribute__((target("neon"))) float32x4_t vcombine_f32(float32x2_t __p0, #else __ai __attribute__((target("neon"))) float32x4_t vcombine_f32(float32x2_t __p0, float32x2_t __p1) { float32x4_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) float32x4_t __noswap_vcombine_f32(float32x2_t __p0, float32x2_t __p1) { @@ -8412,10 +8433,10 @@ __ai __attribute__((target("neon"))) float16x8_t vcombine_f16(float16x4_t __p0, #else __ai __attribute__((target("neon"))) float16x8_t vcombine_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) float16x8_t __noswap_vcombine_f16(float16x4_t __p0, float16x4_t __p1) { @@ -8434,10 +8455,10 @@ __ai __attribute__((target("neon"))) int32x4_t vcombine_s32(int32x2_t __p0, int3 #else __ai __attribute__((target("neon"))) int32x4_t vcombine_s32(int32x2_t __p0, int32x2_t __p1) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vcombine_s32(int32x2_t __p0, int32x2_t __p1) { @@ -8457,7 +8478,7 @@ __ai __attribute__((target("neon"))) int64x2_t vcombine_s64(int64x1_t __p0, int6 __ai __attribute__((target("neon"))) int64x2_t vcombine_s64(int64x1_t __p0, int64x1_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -8471,10 +8492,10 @@ __ai __attribute__((target("neon"))) int16x8_t vcombine_s16(int16x4_t __p0, int1 #else __ai __attribute__((target("neon"))) int16x8_t vcombine_s16(int16x4_t __p0, int16x4_t __p1) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vcombine_s16(int16x4_t __p0, int16x4_t __p1) { @@ -8487,87 +8508,87 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vcombine_s16(int16x4_t _ #define vcreate_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ uint64_t __promote = __p0; \ - __ret = (poly8x8_t)(__promote); \ + __ret = __builtin_bit_cast(poly8x8_t, __promote); \ __ret; \ }) #define vcreate_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ uint64_t __promote = __p0; \ - __ret = (poly16x4_t)(__promote); \ + __ret = __builtin_bit_cast(poly16x4_t, __promote); \ __ret; \ }) #define vcreate_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ uint64_t __promote = __p0; \ - __ret = (uint8x8_t)(__promote); \ + __ret = __builtin_bit_cast(uint8x8_t, __promote); \ __ret; \ }) #define vcreate_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ uint64_t __promote = __p0; \ - __ret = (uint32x2_t)(__promote); \ + __ret = __builtin_bit_cast(uint32x2_t, __promote); \ __ret; \ }) #define vcreate_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ uint64_t __promote = __p0; \ - __ret = (uint64x1_t)(__promote); \ + __ret = __builtin_bit_cast(uint64x1_t, __promote); \ __ret; \ }) #define vcreate_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ uint64_t __promote = __p0; \ - __ret = (uint16x4_t)(__promote); \ + __ret = __builtin_bit_cast(uint16x4_t, __promote); \ __ret; \ }) #define vcreate_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ uint64_t __promote = __p0; \ - __ret = (int8x8_t)(__promote); \ + __ret = __builtin_bit_cast(int8x8_t, __promote); \ __ret; \ }) #define vcreate_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ uint64_t __promote = __p0; \ - __ret = (float32x2_t)(__promote); \ + __ret = __builtin_bit_cast(float32x2_t, __promote); \ __ret; \ }) #define vcreate_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ uint64_t __promote = __p0; \ - __ret = (float16x4_t)(__promote); \ + __ret = __builtin_bit_cast(float16x4_t, __promote); \ __ret; \ }) #define vcreate_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ uint64_t __promote = __p0; \ - __ret = (int32x2_t)(__promote); \ + __ret = __builtin_bit_cast(int32x2_t, __promote); \ __ret; \ }) #define vcreate_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ uint64_t __promote = __p0; \ - __ret = (int64x1_t)(__promote); \ + __ret = __builtin_bit_cast(int64x1_t, __promote); \ __ret; \ }) #define vcreate_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ uint64_t __promote = __p0; \ - __ret = (int16x4_t)(__promote); \ + __ret = __builtin_bit_cast(int16x4_t, __promote); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vcvtq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_f32_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vcvtq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_f32_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -8575,15 +8596,15 @@ __ai __attribute__((target("neon"))) float32x4_t vcvtq_f32_u32(uint32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vcvtq_f32_s32(int32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_f32_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vcvtq_f32_s32(int32x4_t __p0) { float32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_f32_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -8591,15 +8612,15 @@ __ai __attribute__((target("neon"))) float32x4_t vcvtq_f32_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_u32(uint32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_u32(uint32x2_t __p0) { float32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -8607,15 +8628,15 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { float32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -8624,16 +8645,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvtq_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_n_f32_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vcvtq_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_n_f32_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -8642,16 +8663,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvtq_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_n_f32_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vcvtq_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vcvtq_n_f32_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvtq_n_f32_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -8660,16 +8681,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvt_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_n_f32_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vcvt_n_f32_u32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_n_f32_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -8678,16 +8699,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvt_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_n_f32_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vcvt_n_f32_s32(__p0, __p1) __extension__ ({ \ float32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vcvt_n_f32_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_n_f32_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -8696,16 +8717,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvtq_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vcvtq_n_s32_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtq_n_s32_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vcvtq_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vcvtq_n_s32_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtq_n_s32_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -8714,16 +8735,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvt_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vcvt_n_s32_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvt_n_s32_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vcvt_n_s32_f32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vcvt_n_s32_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvt_n_s32_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -8732,16 +8753,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvtq_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vcvtq_n_u32_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtq_n_u32_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vcvtq_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vcvtq_n_u32_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtq_n_u32_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -8750,16 +8771,16 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #define vcvt_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vcvt_n_u32_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvt_n_u32_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vcvt_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vcvt_n_u32_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvt_n_u32_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -8767,15 +8788,15 @@ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vcvtq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtq_s32_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vcvtq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtq_s32_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -8783,15 +8804,15 @@ __ai __attribute__((target("neon"))) int32x4_t vcvtq_s32_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcvt_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvt_s32_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcvt_s32_f32(float32x2_t __p0) { int32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvt_s32_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -8799,15 +8820,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcvt_s32_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcvtq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtq_u32_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcvtq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtq_u32_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -8815,15 +8836,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcvtq_u32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvt_u32_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvt_u32_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -8839,9 +8860,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_p8(__p0_9, __p1_9) __extension__ ({ \ poly8x8_t __ret_9; \ poly8x8_t __s0_9 = __p0_9; \ - poly8x8_t __rev0_9; __rev0_9 = __builtin_shufflevector(__s0_9, __s0_9, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0_9; __rev0_9 = __builtin_shufflevector(__s0_9, __s0_9, __lane_reverse_64_8); \ __ret_9 = __noswap_splat_lane_p8(__rev0_9, __p1_9); \ - __ret_9 = __builtin_shufflevector(__ret_9, __ret_9, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_9 = __builtin_shufflevector(__ret_9, __ret_9, __lane_reverse_64_8); \ __ret_9; \ }) #endif @@ -8857,9 +8878,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_p16(__p0_11, __p1_11) __extension__ ({ \ poly16x4_t __ret_11; \ poly16x4_t __s0_11 = __p0_11; \ - poly16x4_t __rev0_11; __rev0_11 = __builtin_shufflevector(__s0_11, __s0_11, 3, 2, 1, 0); \ + poly16x4_t __rev0_11; __rev0_11 = __builtin_shufflevector(__s0_11, __s0_11, __lane_reverse_64_16); \ __ret_11 = __noswap_splat_lane_p16(__rev0_11, __p1_11); \ - __ret_11 = __builtin_shufflevector(__ret_11, __ret_11, 3, 2, 1, 0); \ + __ret_11 = __builtin_shufflevector(__ret_11, __ret_11, __lane_reverse_64_16); \ __ret_11; \ }) #endif @@ -8875,9 +8896,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_p8(__p0_13, __p1_13) __extension__ ({ \ poly8x16_t __ret_13; \ poly8x8_t __s0_13 = __p0_13; \ - poly8x8_t __rev0_13; __rev0_13 = __builtin_shufflevector(__s0_13, __s0_13, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0_13; __rev0_13 = __builtin_shufflevector(__s0_13, __s0_13, __lane_reverse_64_8); \ __ret_13 = __noswap_splatq_lane_p8(__rev0_13, __p1_13); \ - __ret_13 = __builtin_shufflevector(__ret_13, __ret_13, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_13 = __builtin_shufflevector(__ret_13, __ret_13, __lane_reverse_128_8); \ __ret_13; \ }) #endif @@ -8893,9 +8914,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_p16(__p0_15, __p1_15) __extension__ ({ \ poly16x8_t __ret_15; \ poly16x4_t __s0_15 = __p0_15; \ - poly16x4_t __rev0_15; __rev0_15 = __builtin_shufflevector(__s0_15, __s0_15, 3, 2, 1, 0); \ + poly16x4_t __rev0_15; __rev0_15 = __builtin_shufflevector(__s0_15, __s0_15, __lane_reverse_64_16); \ __ret_15 = __noswap_splatq_lane_p16(__rev0_15, __p1_15); \ - __ret_15 = __builtin_shufflevector(__ret_15, __ret_15, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_15 = __builtin_shufflevector(__ret_15, __ret_15, __lane_reverse_128_16); \ __ret_15; \ }) #endif @@ -8911,9 +8932,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_u8(__p0_17, __p1_17) __extension__ ({ \ uint8x16_t __ret_17; \ uint8x8_t __s0_17 = __p0_17; \ - uint8x8_t __rev0_17; __rev0_17 = __builtin_shufflevector(__s0_17, __s0_17, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0_17; __rev0_17 = __builtin_shufflevector(__s0_17, __s0_17, __lane_reverse_64_8); \ __ret_17 = __noswap_splatq_lane_u8(__rev0_17, __p1_17); \ - __ret_17 = __builtin_shufflevector(__ret_17, __ret_17, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_17 = __builtin_shufflevector(__ret_17, __ret_17, __lane_reverse_128_8); \ __ret_17; \ }) #endif @@ -8929,9 +8950,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_u32(__p0_19, __p1_19) __extension__ ({ \ uint32x4_t __ret_19; \ uint32x2_t __s0_19 = __p0_19; \ - uint32x2_t __rev0_19; __rev0_19 = __builtin_shufflevector(__s0_19, __s0_19, 1, 0); \ + uint32x2_t __rev0_19; __rev0_19 = __builtin_shufflevector(__s0_19, __s0_19, __lane_reverse_64_32); \ __ret_19 = __noswap_splatq_lane_u32(__rev0_19, __p1_19); \ - __ret_19 = __builtin_shufflevector(__ret_19, __ret_19, 3, 2, 1, 0); \ + __ret_19 = __builtin_shufflevector(__ret_19, __ret_19, __lane_reverse_128_32); \ __ret_19; \ }) #endif @@ -8948,7 +8969,7 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { uint64x2_t __ret_21; \ uint64x1_t __s0_21 = __p0_21; \ __ret_21 = __noswap_splatq_lane_u64(__s0_21, __p1_21); \ - __ret_21 = __builtin_shufflevector(__ret_21, __ret_21, 1, 0); \ + __ret_21 = __builtin_shufflevector(__ret_21, __ret_21, __lane_reverse_128_64); \ __ret_21; \ }) #endif @@ -8964,9 +8985,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_u16(__p0_23, __p1_23) __extension__ ({ \ uint16x8_t __ret_23; \ uint16x4_t __s0_23 = __p0_23; \ - uint16x4_t __rev0_23; __rev0_23 = __builtin_shufflevector(__s0_23, __s0_23, 3, 2, 1, 0); \ + uint16x4_t __rev0_23; __rev0_23 = __builtin_shufflevector(__s0_23, __s0_23, __lane_reverse_64_16); \ __ret_23 = __noswap_splatq_lane_u16(__rev0_23, __p1_23); \ - __ret_23 = __builtin_shufflevector(__ret_23, __ret_23, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_23 = __builtin_shufflevector(__ret_23, __ret_23, __lane_reverse_128_16); \ __ret_23; \ }) #endif @@ -8982,9 +9003,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_s8(__p0_25, __p1_25) __extension__ ({ \ int8x16_t __ret_25; \ int8x8_t __s0_25 = __p0_25; \ - int8x8_t __rev0_25; __rev0_25 = __builtin_shufflevector(__s0_25, __s0_25, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0_25; __rev0_25 = __builtin_shufflevector(__s0_25, __s0_25, __lane_reverse_64_8); \ __ret_25 = __noswap_splatq_lane_s8(__rev0_25, __p1_25); \ - __ret_25 = __builtin_shufflevector(__ret_25, __ret_25, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_25 = __builtin_shufflevector(__ret_25, __ret_25, __lane_reverse_128_8); \ __ret_25; \ }) #endif @@ -9000,9 +9021,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_f32(__p0_27, __p1_27) __extension__ ({ \ float32x4_t __ret_27; \ float32x2_t __s0_27 = __p0_27; \ - float32x2_t __rev0_27; __rev0_27 = __builtin_shufflevector(__s0_27, __s0_27, 1, 0); \ + float32x2_t __rev0_27; __rev0_27 = __builtin_shufflevector(__s0_27, __s0_27, __lane_reverse_64_32); \ __ret_27 = __noswap_splatq_lane_f32(__rev0_27, __p1_27); \ - __ret_27 = __builtin_shufflevector(__ret_27, __ret_27, 3, 2, 1, 0); \ + __ret_27 = __builtin_shufflevector(__ret_27, __ret_27, __lane_reverse_128_32); \ __ret_27; \ }) #endif @@ -9018,9 +9039,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_f16(__p0_29, __p1_29) __extension__ ({ \ float16x8_t __ret_29; \ float16x4_t __s0_29 = __p0_29; \ - float16x4_t __rev0_29; __rev0_29 = __builtin_shufflevector(__s0_29, __s0_29, 3, 2, 1, 0); \ + float16x4_t __rev0_29; __rev0_29 = __builtin_shufflevector(__s0_29, __s0_29, __lane_reverse_64_16); \ __ret_29 = __noswap_splatq_lane_f16(__rev0_29, __p1_29); \ - __ret_29 = __builtin_shufflevector(__ret_29, __ret_29, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_29 = __builtin_shufflevector(__ret_29, __ret_29, __lane_reverse_128_16); \ __ret_29; \ }) #endif @@ -9036,9 +9057,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_s32(__p0_31, __p1_31) __extension__ ({ \ int32x4_t __ret_31; \ int32x2_t __s0_31 = __p0_31; \ - int32x2_t __rev0_31; __rev0_31 = __builtin_shufflevector(__s0_31, __s0_31, 1, 0); \ + int32x2_t __rev0_31; __rev0_31 = __builtin_shufflevector(__s0_31, __s0_31, __lane_reverse_64_32); \ __ret_31 = __noswap_splatq_lane_s32(__rev0_31, __p1_31); \ - __ret_31 = __builtin_shufflevector(__ret_31, __ret_31, 3, 2, 1, 0); \ + __ret_31 = __builtin_shufflevector(__ret_31, __ret_31, __lane_reverse_128_32); \ __ret_31; \ }) #endif @@ -9055,7 +9076,7 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { int64x2_t __ret_33; \ int64x1_t __s0_33 = __p0_33; \ __ret_33 = __noswap_splatq_lane_s64(__s0_33, __p1_33); \ - __ret_33 = __builtin_shufflevector(__ret_33, __ret_33, 1, 0); \ + __ret_33 = __builtin_shufflevector(__ret_33, __ret_33, __lane_reverse_128_64); \ __ret_33; \ }) #endif @@ -9071,9 +9092,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdupq_lane_s16(__p0_35, __p1_35) __extension__ ({ \ int16x8_t __ret_35; \ int16x4_t __s0_35 = __p0_35; \ - int16x4_t __rev0_35; __rev0_35 = __builtin_shufflevector(__s0_35, __s0_35, 3, 2, 1, 0); \ + int16x4_t __rev0_35; __rev0_35 = __builtin_shufflevector(__s0_35, __s0_35, __lane_reverse_64_16); \ __ret_35 = __noswap_splatq_lane_s16(__rev0_35, __p1_35); \ - __ret_35 = __builtin_shufflevector(__ret_35, __ret_35, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_35 = __builtin_shufflevector(__ret_35, __ret_35, __lane_reverse_128_16); \ __ret_35; \ }) #endif @@ -9089,9 +9110,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_u8(__p0_37, __p1_37) __extension__ ({ \ uint8x8_t __ret_37; \ uint8x8_t __s0_37 = __p0_37; \ - uint8x8_t __rev0_37; __rev0_37 = __builtin_shufflevector(__s0_37, __s0_37, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0_37; __rev0_37 = __builtin_shufflevector(__s0_37, __s0_37, __lane_reverse_64_8); \ __ret_37 = __noswap_splat_lane_u8(__rev0_37, __p1_37); \ - __ret_37 = __builtin_shufflevector(__ret_37, __ret_37, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_37 = __builtin_shufflevector(__ret_37, __ret_37, __lane_reverse_64_8); \ __ret_37; \ }) #endif @@ -9107,9 +9128,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_u32(__p0_39, __p1_39) __extension__ ({ \ uint32x2_t __ret_39; \ uint32x2_t __s0_39 = __p0_39; \ - uint32x2_t __rev0_39; __rev0_39 = __builtin_shufflevector(__s0_39, __s0_39, 1, 0); \ + uint32x2_t __rev0_39; __rev0_39 = __builtin_shufflevector(__s0_39, __s0_39, __lane_reverse_64_32); \ __ret_39 = __noswap_splat_lane_u32(__rev0_39, __p1_39); \ - __ret_39 = __builtin_shufflevector(__ret_39, __ret_39, 1, 0); \ + __ret_39 = __builtin_shufflevector(__ret_39, __ret_39, __lane_reverse_64_32); \ __ret_39; \ }) #endif @@ -9131,9 +9152,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_u16(__p0_42, __p1_42) __extension__ ({ \ uint16x4_t __ret_42; \ uint16x4_t __s0_42 = __p0_42; \ - uint16x4_t __rev0_42; __rev0_42 = __builtin_shufflevector(__s0_42, __s0_42, 3, 2, 1, 0); \ + uint16x4_t __rev0_42; __rev0_42 = __builtin_shufflevector(__s0_42, __s0_42, __lane_reverse_64_16); \ __ret_42 = __noswap_splat_lane_u16(__rev0_42, __p1_42); \ - __ret_42 = __builtin_shufflevector(__ret_42, __ret_42, 3, 2, 1, 0); \ + __ret_42 = __builtin_shufflevector(__ret_42, __ret_42, __lane_reverse_64_16); \ __ret_42; \ }) #endif @@ -9149,9 +9170,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_s8(__p0_44, __p1_44) __extension__ ({ \ int8x8_t __ret_44; \ int8x8_t __s0_44 = __p0_44; \ - int8x8_t __rev0_44; __rev0_44 = __builtin_shufflevector(__s0_44, __s0_44, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0_44; __rev0_44 = __builtin_shufflevector(__s0_44, __s0_44, __lane_reverse_64_8); \ __ret_44 = __noswap_splat_lane_s8(__rev0_44, __p1_44); \ - __ret_44 = __builtin_shufflevector(__ret_44, __ret_44, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_44 = __builtin_shufflevector(__ret_44, __ret_44, __lane_reverse_64_8); \ __ret_44; \ }) #endif @@ -9167,9 +9188,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_f32(__p0_46, __p1_46) __extension__ ({ \ float32x2_t __ret_46; \ float32x2_t __s0_46 = __p0_46; \ - float32x2_t __rev0_46; __rev0_46 = __builtin_shufflevector(__s0_46, __s0_46, 1, 0); \ + float32x2_t __rev0_46; __rev0_46 = __builtin_shufflevector(__s0_46, __s0_46, __lane_reverse_64_32); \ __ret_46 = __noswap_splat_lane_f32(__rev0_46, __p1_46); \ - __ret_46 = __builtin_shufflevector(__ret_46, __ret_46, 1, 0); \ + __ret_46 = __builtin_shufflevector(__ret_46, __ret_46, __lane_reverse_64_32); \ __ret_46; \ }) #endif @@ -9185,9 +9206,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_f16(__p0_48, __p1_48) __extension__ ({ \ float16x4_t __ret_48; \ float16x4_t __s0_48 = __p0_48; \ - float16x4_t __rev0_48; __rev0_48 = __builtin_shufflevector(__s0_48, __s0_48, 3, 2, 1, 0); \ + float16x4_t __rev0_48; __rev0_48 = __builtin_shufflevector(__s0_48, __s0_48, __lane_reverse_64_16); \ __ret_48 = __noswap_splat_lane_f16(__rev0_48, __p1_48); \ - __ret_48 = __builtin_shufflevector(__ret_48, __ret_48, 3, 2, 1, 0); \ + __ret_48 = __builtin_shufflevector(__ret_48, __ret_48, __lane_reverse_64_16); \ __ret_48; \ }) #endif @@ -9203,9 +9224,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_s32(__p0_50, __p1_50) __extension__ ({ \ int32x2_t __ret_50; \ int32x2_t __s0_50 = __p0_50; \ - int32x2_t __rev0_50; __rev0_50 = __builtin_shufflevector(__s0_50, __s0_50, 1, 0); \ + int32x2_t __rev0_50; __rev0_50 = __builtin_shufflevector(__s0_50, __s0_50, __lane_reverse_64_32); \ __ret_50 = __noswap_splat_lane_s32(__rev0_50, __p1_50); \ - __ret_50 = __builtin_shufflevector(__ret_50, __ret_50, 1, 0); \ + __ret_50 = __builtin_shufflevector(__ret_50, __ret_50, __lane_reverse_64_32); \ __ret_50; \ }) #endif @@ -9227,9 +9248,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { #define vdup_lane_s16(__p0_53, __p1_53) __extension__ ({ \ int16x4_t __ret_53; \ int16x4_t __s0_53 = __p0_53; \ - int16x4_t __rev0_53; __rev0_53 = __builtin_shufflevector(__s0_53, __s0_53, 3, 2, 1, 0); \ + int16x4_t __rev0_53; __rev0_53 = __builtin_shufflevector(__s0_53, __s0_53, __lane_reverse_64_16); \ __ret_53 = __noswap_splat_lane_s16(__rev0_53, __p1_53); \ - __ret_53 = __builtin_shufflevector(__ret_53, __ret_53, 3, 2, 1, 0); \ + __ret_53 = __builtin_shufflevector(__ret_53, __ret_53, __lane_reverse_64_16); \ __ret_53; \ }) #endif @@ -9244,7 +9265,7 @@ __ai __attribute__((target("neon"))) poly8x8_t vdup_n_p8(poly8_t __p0) { __ai __attribute__((target("neon"))) poly8x8_t vdup_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -9259,7 +9280,7 @@ __ai __attribute__((target("neon"))) poly16x4_t vdup_n_p16(poly16_t __p0) { __ai __attribute__((target("neon"))) poly16x4_t vdup_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -9274,7 +9295,7 @@ __ai __attribute__((target("neon"))) poly8x16_t vdupq_n_p8(poly8_t __p0) { __ai __attribute__((target("neon"))) poly8x16_t vdupq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -9289,7 +9310,7 @@ __ai __attribute__((target("neon"))) poly16x8_t vdupq_n_p16(poly16_t __p0) { __ai __attribute__((target("neon"))) poly16x8_t vdupq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -9304,7 +9325,7 @@ __ai __attribute__((target("neon"))) uint8x16_t vdupq_n_u8(uint8_t __p0) { __ai __attribute__((target("neon"))) uint8x16_t vdupq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -9319,7 +9340,7 @@ __ai __attribute__((target("neon"))) uint32x4_t vdupq_n_u32(uint32_t __p0) { __ai __attribute__((target("neon"))) uint32x4_t vdupq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -9334,7 +9355,7 @@ __ai __attribute__((target("neon"))) uint64x2_t vdupq_n_u64(uint64_t __p0) { __ai __attribute__((target("neon"))) uint64x2_t vdupq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -9349,7 +9370,7 @@ __ai __attribute__((target("neon"))) uint16x8_t vdupq_n_u16(uint16_t __p0) { __ai __attribute__((target("neon"))) uint16x8_t vdupq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -9364,7 +9385,7 @@ __ai __attribute__((target("neon"))) int8x16_t vdupq_n_s8(int8_t __p0) { __ai __attribute__((target("neon"))) int8x16_t vdupq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -9379,7 +9400,7 @@ __ai __attribute__((target("neon"))) float32x4_t vdupq_n_f32(float32_t __p0) { __ai __attribute__((target("neon"))) float32x4_t vdupq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -9396,7 +9417,7 @@ __ai __attribute__((target("neon"))) float32x4_t vdupq_n_f32(float32_t __p0) { float16x8_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -9411,7 +9432,7 @@ __ai __attribute__((target("neon"))) int32x4_t vdupq_n_s32(int32_t __p0) { __ai __attribute__((target("neon"))) int32x4_t vdupq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -9426,7 +9447,7 @@ __ai __attribute__((target("neon"))) int64x2_t vdupq_n_s64(int64_t __p0) { __ai __attribute__((target("neon"))) int64x2_t vdupq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -9441,7 +9462,7 @@ __ai __attribute__((target("neon"))) int16x8_t vdupq_n_s16(int16_t __p0) { __ai __attribute__((target("neon"))) int16x8_t vdupq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -9456,7 +9477,7 @@ __ai __attribute__((target("neon"))) uint8x8_t vdup_n_u8(uint8_t __p0) { __ai __attribute__((target("neon"))) uint8x8_t vdup_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -9471,7 +9492,7 @@ __ai __attribute__((target("neon"))) uint32x2_t vdup_n_u32(uint32_t __p0) { __ai __attribute__((target("neon"))) uint32x2_t vdup_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -9491,7 +9512,7 @@ __ai __attribute__((target("neon"))) uint16x4_t vdup_n_u16(uint16_t __p0) { __ai __attribute__((target("neon"))) uint16x4_t vdup_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -9506,7 +9527,7 @@ __ai __attribute__((target("neon"))) int8x8_t vdup_n_s8(int8_t __p0) { __ai __attribute__((target("neon"))) int8x8_t vdup_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -9521,7 +9542,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdup_n_f32(float32_t __p0) { __ai __attribute__((target("neon"))) float32x2_t vdup_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -9538,7 +9559,7 @@ __ai __attribute__((target("neon"))) float32x2_t vdup_n_f32(float32_t __p0) { float16x4_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -9553,7 +9574,7 @@ __ai __attribute__((target("neon"))) int32x2_t vdup_n_s32(int32_t __p0) { __ai __attribute__((target("neon"))) int32x2_t vdup_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -9573,7 +9594,7 @@ __ai __attribute__((target("neon"))) int16x4_t vdup_n_s16(int16_t __p0) { __ai __attribute__((target("neon"))) int16x4_t vdup_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -9587,10 +9608,10 @@ __ai __attribute__((target("neon"))) uint8x16_t veorq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t veorq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -9604,10 +9625,10 @@ __ai __attribute__((target("neon"))) uint32x4_t veorq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t veorq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -9621,10 +9642,10 @@ __ai __attribute__((target("neon"))) uint64x2_t veorq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t veorq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -9638,10 +9659,10 @@ __ai __attribute__((target("neon"))) uint16x8_t veorq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t veorq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -9655,10 +9676,10 @@ __ai __attribute__((target("neon"))) int8x16_t veorq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t veorq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -9672,10 +9693,10 @@ __ai __attribute__((target("neon"))) int32x4_t veorq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t veorq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -9689,10 +9710,10 @@ __ai __attribute__((target("neon"))) int64x2_t veorq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t veorq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -9706,10 +9727,10 @@ __ai __attribute__((target("neon"))) int16x8_t veorq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t veorq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -9723,10 +9744,10 @@ __ai __attribute__((target("neon"))) uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -9740,10 +9761,10 @@ __ai __attribute__((target("neon"))) uint32x2_t veor_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -9762,10 +9783,10 @@ __ai __attribute__((target("neon"))) uint16x4_t veor_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -9779,10 +9800,10 @@ __ai __attribute__((target("neon"))) int8x8_t veor_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t veor_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -9796,10 +9817,10 @@ __ai __attribute__((target("neon"))) int32x2_t veor_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -9818,10 +9839,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 ^ __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -9831,7 +9852,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = (poly8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 4)); \ __ret; \ }) #else @@ -9839,10 +9860,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -9852,7 +9873,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = (poly16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 5)); \ __ret; \ }) #else @@ -9860,10 +9881,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -9873,7 +9894,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -9881,10 +9902,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -9894,7 +9915,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -9902,10 +9923,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -9915,7 +9936,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -9923,10 +9944,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -9936,7 +9957,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 50)); \ __ret; \ }) #else @@ -9944,10 +9965,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -9957,7 +9978,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else @@ -9965,10 +9986,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -9978,7 +9999,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -9986,10 +10007,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -9999,7 +10020,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -10007,10 +10028,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -10020,7 +10041,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ - __ret = (float32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 41)); \ __ret; \ }) #else @@ -10028,10 +10049,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float32x4_t __ret; \ float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -10041,7 +10062,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -10049,10 +10070,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -10062,7 +10083,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else @@ -10070,10 +10091,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -10083,7 +10104,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -10091,10 +10112,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -10104,7 +10125,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 16)); \ __ret; \ }) #else @@ -10112,10 +10133,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -10125,7 +10146,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 18)); \ __ret; \ }) #else @@ -10133,10 +10154,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -10145,7 +10166,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -10153,7 +10174,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 17)); \ __ret; \ }) #else @@ -10161,10 +10182,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -10174,7 +10195,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 0)); \ __ret; \ }) #else @@ -10182,10 +10203,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -10195,7 +10216,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ - __ret = (float32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 9)); \ __ret; \ }) #else @@ -10203,10 +10224,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float32x2_t __ret; \ float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -10216,7 +10237,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -10224,10 +10245,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -10236,7 +10257,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -10244,7 +10265,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -10252,10 +10273,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -10265,7 +10286,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -10273,10 +10294,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -10286,7 +10307,7 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ - __ret = (float16x4_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 8)); \ __ret; \ }) #else @@ -10294,10 +10315,10 @@ __ai __attribute__((target("neon"))) int16x4_t veor_s16(int16x4_t __p0, int16x4_ float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vext_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -10311,9 +10332,9 @@ __ai __attribute__((target("neon"))) poly8x8_t vget_high_p8(poly8x16_t __p0) { #else __ai __attribute__((target("neon"))) poly8x8_t vget_high_p8(poly8x16_t __p0) { poly8x8_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t __noswap_vget_high_p8(poly8x16_t __p0) { @@ -10332,9 +10353,9 @@ __ai __attribute__((target("neon"))) poly16x4_t vget_high_p16(poly16x8_t __p0) { #else __ai __attribute__((target("neon"))) poly16x4_t vget_high_p16(poly16x8_t __p0) { poly16x4_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -10348,9 +10369,9 @@ __ai __attribute__((target("neon"))) uint8x8_t vget_high_u8(uint8x16_t __p0) { #else __ai __attribute__((target("neon"))) uint8x8_t vget_high_u8(uint8x16_t __p0) { uint8x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vget_high_u8(uint8x16_t __p0) { @@ -10369,9 +10390,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vget_high_u32(uint32x4_t __p0) { #else __ai __attribute__((target("neon"))) uint32x2_t vget_high_u32(uint32x4_t __p0) { uint32x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vget_high_u32(uint32x4_t __p0) { @@ -10390,7 +10411,7 @@ __ai __attribute__((target("neon"))) uint64x1_t vget_high_u64(uint64x2_t __p0) { #else __ai __attribute__((target("neon"))) uint64x1_t vget_high_u64(uint64x2_t __p0) { uint64x1_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } @@ -10405,9 +10426,9 @@ __ai __attribute__((target("neon"))) uint16x4_t vget_high_u16(uint16x8_t __p0) { #else __ai __attribute__((target("neon"))) uint16x4_t vget_high_u16(uint16x8_t __p0) { uint16x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vget_high_u16(uint16x8_t __p0) { @@ -10426,9 +10447,9 @@ __ai __attribute__((target("neon"))) int8x8_t vget_high_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vget_high_s8(int8x16_t __p0) { int8x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vget_high_s8(int8x16_t __p0) { @@ -10447,9 +10468,9 @@ __ai __attribute__((target("neon"))) float32x2_t vget_high_f32(float32x4_t __p0) #else __ai __attribute__((target("neon"))) float32x2_t vget_high_f32(float32x4_t __p0) { float32x2_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) float32x2_t __noswap_vget_high_f32(float32x4_t __p0) { @@ -10468,9 +10489,9 @@ __ai __attribute__((target("neon"))) float16x4_t vget_high_f16(float16x8_t __p0) #else __ai __attribute__((target("neon"))) float16x4_t vget_high_f16(float16x8_t __p0) { float16x4_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) float16x4_t __noswap_vget_high_f16(float16x8_t __p0) { @@ -10489,9 +10510,9 @@ __ai __attribute__((target("neon"))) int32x2_t vget_high_s32(int32x4_t __p0) { #else __ai __attribute__((target("neon"))) int32x2_t vget_high_s32(int32x4_t __p0) { int32x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vget_high_s32(int32x4_t __p0) { @@ -10510,7 +10531,7 @@ __ai __attribute__((target("neon"))) int64x1_t vget_high_s64(int64x2_t __p0) { #else __ai __attribute__((target("neon"))) int64x1_t vget_high_s64(int64x2_t __p0) { int64x1_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } @@ -10525,9 +10546,9 @@ __ai __attribute__((target("neon"))) int16x4_t vget_high_s16(int16x8_t __p0) { #else __ai __attribute__((target("neon"))) int16x4_t vget_high_s16(int16x8_t __p0) { int16x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t __p0) { @@ -10541,21 +10562,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__s0, __p1)); \ __ret; \ }) #else #define vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__rev0, __p1); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__rev0, __p1)); \ __ret; \ }) #define __noswap_vget_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8_t) __builtin_neon_vget_lane_i8((poly8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vget_lane_i8(__s0, __p1)); \ __ret; \ }) #endif @@ -10564,21 +10585,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__s0, __p1)); \ __ret; \ }) #else #define vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__rev0, __p1); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__rev0, __p1)); \ __ret; \ }) #define __noswap_vget_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16_t) __builtin_neon_vget_lane_i16((poly16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vget_lane_i16(__s0, __p1)); \ __ret; \ }) #endif @@ -10587,21 +10608,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__s0, __p1)); \ __ret; \ }) #else #define vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__rev0, __p1); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__rev0, __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x16_t __s0 = __p0; \ - __ret = (poly8_t) __builtin_neon_vgetq_lane_i8((poly8x16_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vgetq_lane_i8(__s0, __p1)); \ __ret; \ }) #endif @@ -10610,21 +10631,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__s0, __p1)); \ __ret; \ }) #else #define vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__rev0, __p1); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__rev0, __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x8_t __s0 = __p0; \ - __ret = (poly16_t) __builtin_neon_vgetq_lane_i16((poly16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vgetq_lane_i16(__s0, __p1)); \ __ret; \ }) #endif @@ -10633,21 +10654,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10656,21 +10677,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vgetq_lane_i32(__builtin_bit_cast(int32x4_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__rev0, __p1); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vgetq_lane_i32(__builtin_bit_cast(int32x4_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vgetq_lane_i32(__builtin_bit_cast(int32x4_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10679,21 +10700,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__rev0, __p1); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10702,21 +10723,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__rev0, __p1); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10725,21 +10746,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__rev0, __p1); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vgetq_lane_i8((int8x16_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vgetq_lane_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10748,21 +10769,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vgetq_lane_f32(__s0, __p1)); \ __ret; \ }) #else #define vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__rev0, __p1); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vgetq_lane_f32(__rev0, __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x4_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vgetq_lane_f32(__s0, __p1)); \ __ret; \ }) #endif @@ -10771,21 +10792,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vgetq_lane_i32(__builtin_bit_cast(int32x4_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__rev0, __p1); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vgetq_lane_i32(__builtin_bit_cast(int32x4_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vgetq_lane_i32(__builtin_bit_cast(int32x4_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10794,21 +10815,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__rev0, __p1); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vgetq_lane_i64((int64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vgetq_lane_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10817,21 +10838,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #else #define vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__rev0, __p1); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vgetq_lane_i16((int16x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vgetq_lane_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10840,21 +10861,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #else #define vget_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10863,21 +10884,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vget_lane_i32(__builtin_bit_cast(int32x2_t, __s0), __p1)); \ __ret; \ }) #else #define vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__rev0, __p1); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vget_lane_i32(__builtin_bit_cast(int32x2_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vget_lane_i32(__builtin_bit_cast(int32x2_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10885,28 +10906,28 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vget_lane_i64((int64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vget_lane_i64(__builtin_bit_cast(int64x1_t, __s0), __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #else #define vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__rev0, __p1); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10915,21 +10936,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #else #define vget_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__rev0, __p1); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vget_lane_i8((int8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vget_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10938,21 +10959,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vget_lane_f32(__s0, __p1)); \ __ret; \ }) #else #define vget_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__rev0, __p1); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vget_lane_f32(__rev0, __p1)); \ __ret; \ }) #define __noswap_vget_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vget_lane_f32((float32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vget_lane_f32(__s0, __p1)); \ __ret; \ }) #endif @@ -10961,21 +10982,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vget_lane_i32(__builtin_bit_cast(int32x2_t, __s0), __p1)); \ __ret; \ }) #else #define vget_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__rev0, __p1); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vget_lane_i32(__builtin_bit_cast(int32x2_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vget_lane_i32((int32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vget_lane_i32(__builtin_bit_cast(int32x2_t, __s0), __p1)); \ __ret; \ }) #endif @@ -10983,28 +11004,28 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vget_high_s16(int16x8_t #define vget_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vget_lane_i64((int64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vget_lane_i64(__builtin_bit_cast(int64x1_t, __s0), __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vget_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #else #define vget_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__rev0, __p1); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \ __ret; \ }) #define __noswap_vget_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vget_lane_i16((int16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vget_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #endif @@ -11018,9 +11039,9 @@ __ai __attribute__((target("neon"))) poly8x8_t vget_low_p8(poly8x16_t __p0) { #else __ai __attribute__((target("neon"))) poly8x8_t vget_low_p8(poly8x16_t __p0) { poly8x8_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11034,9 +11055,9 @@ __ai __attribute__((target("neon"))) poly16x4_t vget_low_p16(poly16x8_t __p0) { #else __ai __attribute__((target("neon"))) poly16x4_t vget_low_p16(poly16x8_t __p0) { poly16x4_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11050,9 +11071,9 @@ __ai __attribute__((target("neon"))) uint8x8_t vget_low_u8(uint8x16_t __p0) { #else __ai __attribute__((target("neon"))) uint8x8_t vget_low_u8(uint8x16_t __p0) { uint8x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11066,9 +11087,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vget_low_u32(uint32x4_t __p0) { #else __ai __attribute__((target("neon"))) uint32x2_t vget_low_u32(uint32x4_t __p0) { uint32x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11082,7 +11103,7 @@ __ai __attribute__((target("neon"))) uint64x1_t vget_low_u64(uint64x2_t __p0) { #else __ai __attribute__((target("neon"))) uint64x1_t vget_low_u64(uint64x2_t __p0) { uint64x1_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } @@ -11097,9 +11118,9 @@ __ai __attribute__((target("neon"))) uint16x4_t vget_low_u16(uint16x8_t __p0) { #else __ai __attribute__((target("neon"))) uint16x4_t vget_low_u16(uint16x8_t __p0) { uint16x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11113,9 +11134,9 @@ __ai __attribute__((target("neon"))) int8x8_t vget_low_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vget_low_s8(int8x16_t __p0) { int8x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11129,9 +11150,9 @@ __ai __attribute__((target("neon"))) float32x2_t vget_low_f32(float32x4_t __p0) #else __ai __attribute__((target("neon"))) float32x2_t vget_low_f32(float32x4_t __p0) { float32x2_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11145,9 +11166,9 @@ __ai __attribute__((target("neon"))) float16x4_t vget_low_f16(float16x8_t __p0) #else __ai __attribute__((target("neon"))) float16x4_t vget_low_f16(float16x8_t __p0) { float16x4_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11161,9 +11182,9 @@ __ai __attribute__((target("neon"))) int32x2_t vget_low_s32(int32x4_t __p0) { #else __ai __attribute__((target("neon"))) int32x2_t vget_low_s32(int32x4_t __p0) { int32x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11177,7 +11198,7 @@ __ai __attribute__((target("neon"))) int64x1_t vget_low_s64(int64x2_t __p0) { #else __ai __attribute__((target("neon"))) int64x1_t vget_low_s64(int64x2_t __p0) { int64x1_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } @@ -11192,9 +11213,9 @@ __ai __attribute__((target("neon"))) int16x4_t vget_low_s16(int16x8_t __p0) { #else __ai __attribute__((target("neon"))) int16x4_t vget_low_s16(int16x8_t __p0) { int16x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11202,16 +11223,16 @@ __ai __attribute__((target("neon"))) int16x4_t vget_low_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -11219,16 +11240,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -11236,16 +11257,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vhaddq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -11253,16 +11274,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vhaddq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -11270,16 +11291,16 @@ __ai __attribute__((target("neon"))) int8x16_t vhaddq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -11287,16 +11308,16 @@ __ai __attribute__((target("neon"))) int32x4_t vhaddq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -11304,16 +11325,16 @@ __ai __attribute__((target("neon"))) int16x8_t vhaddq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11321,16 +11342,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11338,16 +11359,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vhadd_u32(uint32x2_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11355,16 +11376,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vhadd_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11372,16 +11393,16 @@ __ai __attribute__((target("neon"))) int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11389,16 +11410,16 @@ __ai __attribute__((target("neon"))) int32x2_t vhadd_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11406,16 +11427,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhadd_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -11423,16 +11444,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -11440,16 +11461,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vhsubq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -11457,16 +11478,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vhsubq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -11474,16 +11495,16 @@ __ai __attribute__((target("neon"))) int8x16_t vhsubq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -11491,16 +11512,16 @@ __ai __attribute__((target("neon"))) int32x4_t vhsubq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vhsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -11508,16 +11529,16 @@ __ai __attribute__((target("neon"))) int16x8_t vhsubq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11525,16 +11546,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11542,16 +11563,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vhsub_u32(uint32x2_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11559,16 +11580,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vhsub_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -11576,16 +11597,16 @@ __ai __attribute__((target("neon"))) int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -11593,16 +11614,16 @@ __ai __attribute__((target("neon"))) int32x2_t vhsub_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vhsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -11610,14 +11631,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vld1_v(__p0, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vld1_v(__p0, 4)); \ __ret; \ }) #else #define vld1_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vld1_v(__p0, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vld1_v(__p0, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -11625,14 +11646,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vld1_v(__p0, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vld1_v(__p0, 5)); \ __ret; \ }) #else #define vld1_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vld1_v(__p0, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vld1_v(__p0, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -11640,14 +11661,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vld1q_v(__p0, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vld1q_v(__p0, 36)); \ __ret; \ }) #else #define vld1q_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vld1q_v(__p0, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vld1q_v(__p0, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -11655,14 +11676,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vld1q_v(__p0, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vld1q_v(__p0, 37)); \ __ret; \ }) #else #define vld1q_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vld1q_v(__p0, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vld1q_v(__p0, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -11670,14 +11691,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vld1q_v(__p0, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vld1q_v(__p0, 48)); \ __ret; \ }) #else #define vld1q_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vld1q_v(__p0, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vld1q_v(__p0, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -11685,14 +11706,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vld1q_v(__p0, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vld1q_v(__p0, 50)); \ __ret; \ }) #else #define vld1q_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vld1q_v(__p0, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vld1q_v(__p0, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -11700,14 +11721,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vld1q_v(__p0, 51)); \ __ret; \ }) #else #define vld1q_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vld1q_v(__p0, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vld1q_v(__p0, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -11715,14 +11736,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vld1q_v(__p0, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vld1q_v(__p0, 49)); \ __ret; \ }) #else #define vld1q_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vld1q_v(__p0, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vld1q_v(__p0, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -11730,14 +11751,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vld1q_v(__p0, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vld1q_v(__p0, 32)); \ __ret; \ }) #else #define vld1q_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vld1q_v(__p0, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vld1q_v(__p0, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -11745,14 +11766,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vld1q_v(__p0, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vld1q_v(__p0, 41)); \ __ret; \ }) #else #define vld1q_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vld1q_v(__p0, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vld1q_v(__p0, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -11760,14 +11781,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vld1q_v(__p0, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vld1q_v(__p0, 34)); \ __ret; \ }) #else #define vld1q_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vld1q_v(__p0, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vld1q_v(__p0, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -11775,14 +11796,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vld1q_v(__p0, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vld1q_v(__p0, 35)); \ __ret; \ }) #else #define vld1q_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vld1q_v(__p0, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vld1q_v(__p0, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -11790,14 +11811,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vld1q_v(__p0, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vld1q_v(__p0, 33)); \ __ret; \ }) #else #define vld1q_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vld1q_v(__p0, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vld1q_v(__p0, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -11805,14 +11826,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vld1_v(__p0, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vld1_v(__p0, 16)); \ __ret; \ }) #else #define vld1_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vld1_v(__p0, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vld1_v(__p0, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -11820,34 +11841,34 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vld1_v(__p0, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vld1_v(__p0, 18)); \ __ret; \ }) #else #define vld1_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vld1_v(__p0, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vld1_v(__p0, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif #define vld1_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vld1_v(__p0, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vld1_v(__p0, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vld1_v(__p0, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vld1_v(__p0, 17)); \ __ret; \ }) #else #define vld1_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vld1_v(__p0, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vld1_v(__p0, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -11855,14 +11876,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vld1_v(__p0, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vld1_v(__p0, 0)); \ __ret; \ }) #else #define vld1_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vld1_v(__p0, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vld1_v(__p0, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -11870,14 +11891,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vld1_v(__p0, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vld1_v(__p0, 9)); \ __ret; \ }) #else #define vld1_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vld1_v(__p0, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vld1_v(__p0, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -11885,34 +11906,34 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vld1_v(__p0, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vld1_v(__p0, 2)); \ __ret; \ }) #else #define vld1_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vld1_v(__p0, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vld1_v(__p0, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif #define vld1_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vld1_v(__p0, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vld1_v(__p0, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vld1_v(__p0, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vld1_v(__p0, 1)); \ __ret; \ }) #else #define vld1_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vld1_v(__p0, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vld1_v(__p0, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -11920,14 +11941,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vld1_dup_v(__p0, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vld1_dup_v(__p0, 4)); \ __ret; \ }) #else #define vld1_dup_p8(__p0) __extension__ ({ \ poly8x8_t __ret; \ - __ret = (poly8x8_t) __builtin_neon_vld1_dup_v(__p0, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vld1_dup_v(__p0, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -11935,14 +11956,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vld1_dup_v(__p0, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vld1_dup_v(__p0, 5)); \ __ret; \ }) #else #define vld1_dup_p16(__p0) __extension__ ({ \ poly16x4_t __ret; \ - __ret = (poly16x4_t) __builtin_neon_vld1_dup_v(__p0, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vld1_dup_v(__p0, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -11950,14 +11971,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vld1q_dup_v(__p0, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vld1q_dup_v(__p0, 36)); \ __ret; \ }) #else #define vld1q_dup_p8(__p0) __extension__ ({ \ poly8x16_t __ret; \ - __ret = (poly8x16_t) __builtin_neon_vld1q_dup_v(__p0, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vld1q_dup_v(__p0, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -11965,14 +11986,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vld1q_dup_v(__p0, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vld1q_dup_v(__p0, 37)); \ __ret; \ }) #else #define vld1q_dup_p16(__p0) __extension__ ({ \ poly16x8_t __ret; \ - __ret = (poly16x8_t) __builtin_neon_vld1q_dup_v(__p0, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vld1q_dup_v(__p0, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -11980,14 +12001,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vld1q_dup_v(__p0, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vld1q_dup_v(__p0, 48)); \ __ret; \ }) #else #define vld1q_dup_u8(__p0) __extension__ ({ \ uint8x16_t __ret; \ - __ret = (uint8x16_t) __builtin_neon_vld1q_dup_v(__p0, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vld1q_dup_v(__p0, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -11995,14 +12016,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vld1q_dup_v(__p0, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vld1q_dup_v(__p0, 50)); \ __ret; \ }) #else #define vld1q_dup_u32(__p0) __extension__ ({ \ uint32x4_t __ret; \ - __ret = (uint32x4_t) __builtin_neon_vld1q_dup_v(__p0, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vld1q_dup_v(__p0, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12010,14 +12031,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vld1q_dup_v(__p0, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vld1q_dup_v(__p0, 51)); \ __ret; \ }) #else #define vld1q_dup_u64(__p0) __extension__ ({ \ uint64x2_t __ret; \ - __ret = (uint64x2_t) __builtin_neon_vld1q_dup_v(__p0, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vld1q_dup_v(__p0, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -12025,14 +12046,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vld1q_dup_v(__p0, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vld1q_dup_v(__p0, 49)); \ __ret; \ }) #else #define vld1q_dup_u16(__p0) __extension__ ({ \ uint16x8_t __ret; \ - __ret = (uint16x8_t) __builtin_neon_vld1q_dup_v(__p0, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vld1q_dup_v(__p0, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12040,14 +12061,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vld1q_dup_v(__p0, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vld1q_dup_v(__p0, 32)); \ __ret; \ }) #else #define vld1q_dup_s8(__p0) __extension__ ({ \ int8x16_t __ret; \ - __ret = (int8x16_t) __builtin_neon_vld1q_dup_v(__p0, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vld1q_dup_v(__p0, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12055,14 +12076,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vld1q_dup_v(__p0, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vld1q_dup_v(__p0, 41)); \ __ret; \ }) #else #define vld1q_dup_f32(__p0) __extension__ ({ \ float32x4_t __ret; \ - __ret = (float32x4_t) __builtin_neon_vld1q_dup_v(__p0, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vld1q_dup_v(__p0, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12070,14 +12091,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vld1q_dup_v(__p0, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vld1q_dup_v(__p0, 34)); \ __ret; \ }) #else #define vld1q_dup_s32(__p0) __extension__ ({ \ int32x4_t __ret; \ - __ret = (int32x4_t) __builtin_neon_vld1q_dup_v(__p0, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vld1q_dup_v(__p0, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12085,14 +12106,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vld1q_dup_v(__p0, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vld1q_dup_v(__p0, 35)); \ __ret; \ }) #else #define vld1q_dup_s64(__p0) __extension__ ({ \ int64x2_t __ret; \ - __ret = (int64x2_t) __builtin_neon_vld1q_dup_v(__p0, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vld1q_dup_v(__p0, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -12100,14 +12121,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vld1q_dup_v(__p0, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vld1q_dup_v(__p0, 33)); \ __ret; \ }) #else #define vld1q_dup_s16(__p0) __extension__ ({ \ int16x8_t __ret; \ - __ret = (int16x8_t) __builtin_neon_vld1q_dup_v(__p0, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vld1q_dup_v(__p0, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12115,14 +12136,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vld1_dup_v(__p0, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vld1_dup_v(__p0, 16)); \ __ret; \ }) #else #define vld1_dup_u8(__p0) __extension__ ({ \ uint8x8_t __ret; \ - __ret = (uint8x8_t) __builtin_neon_vld1_dup_v(__p0, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vld1_dup_v(__p0, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12130,34 +12151,34 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vld1_dup_v(__p0, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vld1_dup_v(__p0, 18)); \ __ret; \ }) #else #define vld1_dup_u32(__p0) __extension__ ({ \ uint32x2_t __ret; \ - __ret = (uint32x2_t) __builtin_neon_vld1_dup_v(__p0, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vld1_dup_v(__p0, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif #define vld1_dup_u64(__p0) __extension__ ({ \ uint64x1_t __ret; \ - __ret = (uint64x1_t) __builtin_neon_vld1_dup_v(__p0, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vld1_dup_v(__p0, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_dup_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vld1_dup_v(__p0, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vld1_dup_v(__p0, 17)); \ __ret; \ }) #else #define vld1_dup_u16(__p0) __extension__ ({ \ uint16x4_t __ret; \ - __ret = (uint16x4_t) __builtin_neon_vld1_dup_v(__p0, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vld1_dup_v(__p0, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12165,14 +12186,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vld1_dup_v(__p0, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vld1_dup_v(__p0, 0)); \ __ret; \ }) #else #define vld1_dup_s8(__p0) __extension__ ({ \ int8x8_t __ret; \ - __ret = (int8x8_t) __builtin_neon_vld1_dup_v(__p0, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vld1_dup_v(__p0, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12180,14 +12201,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vld1_dup_v(__p0, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vld1_dup_v(__p0, 9)); \ __ret; \ }) #else #define vld1_dup_f32(__p0) __extension__ ({ \ float32x2_t __ret; \ - __ret = (float32x2_t) __builtin_neon_vld1_dup_v(__p0, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vld1_dup_v(__p0, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12195,34 +12216,34 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vld1_dup_v(__p0, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vld1_dup_v(__p0, 2)); \ __ret; \ }) #else #define vld1_dup_s32(__p0) __extension__ ({ \ int32x2_t __ret; \ - __ret = (int32x2_t) __builtin_neon_vld1_dup_v(__p0, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vld1_dup_v(__p0, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif #define vld1_dup_s64(__p0) __extension__ ({ \ int64x1_t __ret; \ - __ret = (int64x1_t) __builtin_neon_vld1_dup_v(__p0, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vld1_dup_v(__p0, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_dup_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vld1_dup_v(__p0, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vld1_dup_v(__p0, 1)); \ __ret; \ }) #else #define vld1_dup_s16(__p0) __extension__ ({ \ int16x4_t __ret; \ - __ret = (int16x4_t) __builtin_neon_vld1_dup_v(__p0, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vld1_dup_v(__p0, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12231,16 +12252,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s1 = __p1; \ - __ret = (poly8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 4)); \ __ret; \ }) #else #define vld1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12249,16 +12270,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s1 = __p1; \ - __ret = (poly16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 5)); \ __ret; \ }) #else #define vld1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12267,16 +12288,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else #define vld1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12285,16 +12306,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else #define vld1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12303,16 +12324,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else #define vld1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12321,16 +12342,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 50)); \ __ret; \ }) #else #define vld1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12339,16 +12360,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else #define vld1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -12357,16 +12378,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else #define vld1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12375,16 +12396,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else #define vld1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12393,16 +12414,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s1 = __p1; \ - __ret = (float32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 41)); \ __ret; \ }) #else #define vld1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12411,16 +12432,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else #define vld1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12429,16 +12450,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else #define vld1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -12447,16 +12468,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else #define vld1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12465,16 +12486,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 16)); \ __ret; \ }) #else #define vld1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12483,16 +12504,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 18)); \ __ret; \ }) #else #define vld1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12500,23 +12521,23 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 17)); \ __ret; \ }) #else #define vld1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12525,16 +12546,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 0)); \ __ret; \ }) #else #define vld1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12543,16 +12564,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s1 = __p1; \ - __ret = (float32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 9)); \ __ret; \ }) #else #define vld1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12561,16 +12582,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else #define vld1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12578,23 +12599,23 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else #define vld1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12610,8 +12631,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12627,8 +12648,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12644,8 +12665,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12661,8 +12682,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12678,8 +12699,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12695,8 +12716,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12712,8 +12733,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -12729,8 +12750,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12746,8 +12767,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -12763,8 +12784,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12780,8 +12801,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -12797,8 +12818,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -12814,8 +12835,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -12831,8 +12852,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12848,8 +12869,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12870,8 +12891,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12887,8 +12908,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12904,8 +12925,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12921,8 +12942,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -12943,8 +12964,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12960,9 +12981,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -12978,9 +12999,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -12996,9 +13017,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13014,9 +13035,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13032,9 +13053,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13050,9 +13071,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13068,9 +13089,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -13086,9 +13107,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13104,9 +13125,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13122,9 +13143,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13140,9 +13161,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13158,9 +13179,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -13176,9 +13197,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13194,9 +13215,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13212,9 +13233,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13235,9 +13256,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13253,9 +13274,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13271,9 +13292,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13289,9 +13310,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13312,9 +13333,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13330,10 +13351,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13349,10 +13370,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13368,10 +13389,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13387,10 +13408,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13406,10 +13427,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13425,10 +13446,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13444,10 +13465,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -13463,10 +13484,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13482,10 +13503,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13501,10 +13522,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13520,10 +13541,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13539,10 +13560,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -13558,10 +13579,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13577,10 +13598,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13596,10 +13617,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13620,10 +13641,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13639,10 +13660,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13658,10 +13679,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13677,10 +13698,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13701,10 +13722,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13720,8 +13741,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13737,8 +13758,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13754,8 +13775,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13771,8 +13792,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13788,8 +13809,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13805,8 +13826,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13822,8 +13843,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13839,8 +13860,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -13856,8 +13877,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13873,8 +13894,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -13890,8 +13911,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -13907,8 +13928,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13924,8 +13945,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13946,8 +13967,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -13963,8 +13984,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -13980,8 +14001,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -13997,8 +14018,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14019,8 +14040,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14036,8 +14057,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14053,8 +14074,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14070,8 +14091,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -14087,8 +14108,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14104,8 +14125,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -14121,8 +14142,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14138,8 +14159,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -14155,8 +14176,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14172,8 +14193,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -14189,8 +14210,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14206,8 +14227,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14223,8 +14244,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -14240,8 +14261,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14257,8 +14278,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14274,8 +14295,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14296,8 +14317,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14313,8 +14334,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14330,8 +14351,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14347,8 +14368,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14369,8 +14390,8 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14379,7 +14400,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __ret; \ poly8x8x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 4); \ __ret; \ }) #else @@ -14387,12 +14408,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x2_t __ret; \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14401,7 +14422,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __ret; \ poly16x4x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 5); \ __ret; \ }) #else @@ -14409,12 +14430,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x2_t __ret; \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14423,7 +14444,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __ret; \ poly16x8x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 37); \ __ret; \ }) #else @@ -14431,12 +14452,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x2_t __ret; \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14445,7 +14466,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __ret; \ uint32x4x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 50); \ __ret; \ }) #else @@ -14453,12 +14474,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x2_t __ret; \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14467,7 +14488,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __ret; \ uint16x8x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 49); \ __ret; \ }) #else @@ -14475,12 +14496,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x2_t __ret; \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14489,7 +14510,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __ret; \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 41); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 41); \ __ret; \ }) #else @@ -14497,12 +14518,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x2_t __ret; \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14511,7 +14532,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __ret; \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 34); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 34); \ __ret; \ }) #else @@ -14519,12 +14540,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x2_t __ret; \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14533,7 +14554,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __ret; \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 33); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 33); \ __ret; \ }) #else @@ -14541,12 +14562,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x2_t __ret; \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14555,7 +14576,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __ret; \ uint8x8x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 16); \ __ret; \ }) #else @@ -14563,12 +14584,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x2_t __ret; \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14577,7 +14598,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __ret; \ uint32x2x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 18); \ __ret; \ }) #else @@ -14585,12 +14606,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x2_t __ret; \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14599,7 +14620,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __ret; \ uint16x4x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 17); \ __ret; \ }) #else @@ -14607,12 +14628,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x2_t __ret; \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14621,7 +14642,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __ret; \ int8x8x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 0); \ __ret; \ }) #else @@ -14629,12 +14650,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x2_t __ret; \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14643,7 +14664,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __ret; \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 9); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 9); \ __ret; \ }) #else @@ -14651,12 +14672,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x2_t __ret; \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14665,7 +14686,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __ret; \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 2); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 2); \ __ret; \ }) #else @@ -14673,12 +14694,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x2_t __ret; \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14687,7 +14708,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __ret; \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 1); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 1); \ __ret; \ }) #else @@ -14695,12 +14716,12 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x2_t __ret; \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14716,9 +14737,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14734,9 +14755,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14752,9 +14773,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -14770,9 +14791,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14788,9 +14809,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -14806,9 +14827,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14824,9 +14845,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14842,9 +14863,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -14860,9 +14881,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14878,9 +14899,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -14896,9 +14917,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -14914,9 +14935,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14932,9 +14953,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -14955,9 +14976,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -14973,9 +14994,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -14991,9 +15012,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15009,9 +15030,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15032,9 +15053,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15050,9 +15071,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15068,9 +15089,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15086,9 +15107,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -15104,9 +15125,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15122,9 +15143,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -15140,9 +15161,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15158,9 +15179,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -15176,9 +15197,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15194,9 +15215,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -15212,9 +15233,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15230,9 +15251,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15248,9 +15269,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -15266,9 +15287,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15284,9 +15305,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15302,9 +15323,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15325,9 +15346,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15343,9 +15364,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15361,9 +15382,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15379,9 +15400,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15402,9 +15423,9 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15413,7 +15434,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __ret; \ poly8x8x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 4); \ __ret; \ }) #else @@ -15421,14 +15442,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x3_t __ret; \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15437,7 +15458,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __ret; \ poly16x4x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 5); \ __ret; \ }) #else @@ -15445,14 +15466,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x3_t __ret; \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15461,7 +15482,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __ret; \ poly16x8x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 37); \ __ret; \ }) #else @@ -15469,14 +15490,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x3_t __ret; \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15485,7 +15506,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __ret; \ uint32x4x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 50); \ __ret; \ }) #else @@ -15493,14 +15514,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x3_t __ret; \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15509,7 +15530,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __ret; \ uint16x8x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 49); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 49); \ __ret; \ }) #else @@ -15517,14 +15538,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x3_t __ret; \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15533,7 +15554,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __ret; \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 41); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 41); \ __ret; \ }) #else @@ -15541,14 +15562,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x3_t __ret; \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15557,7 +15578,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __ret; \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 34); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 34); \ __ret; \ }) #else @@ -15565,14 +15586,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x3_t __ret; \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15581,7 +15602,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __ret; \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 33); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 33); \ __ret; \ }) #else @@ -15589,14 +15610,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x3_t __ret; \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15605,7 +15626,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __ret; \ uint8x8x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 16); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 16); \ __ret; \ }) #else @@ -15613,14 +15634,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x3_t __ret; \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15629,7 +15650,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __ret; \ uint32x2x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 18); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 18); \ __ret; \ }) #else @@ -15637,14 +15658,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x3_t __ret; \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15653,7 +15674,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __ret; \ uint16x4x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 17); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 17); \ __ret; \ }) #else @@ -15661,14 +15682,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x3_t __ret; \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15677,7 +15698,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __ret; \ int8x8x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 0); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 0); \ __ret; \ }) #else @@ -15685,14 +15706,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x3_t __ret; \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15701,7 +15722,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __ret; \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 9); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 9); \ __ret; \ }) #else @@ -15709,14 +15730,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x3_t __ret; \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15725,7 +15746,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __ret; \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 2); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 2); \ __ret; \ }) #else @@ -15733,14 +15754,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x3_t __ret; \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -15749,7 +15770,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __ret; \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 1); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 1); \ __ret; \ }) #else @@ -15757,14 +15778,14 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x3_t __ret; \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15780,10 +15801,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -15799,10 +15820,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -15818,10 +15839,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -15837,10 +15858,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15856,10 +15877,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -15875,10 +15896,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15894,10 +15915,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15913,10 +15934,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -15932,10 +15953,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15951,10 +15972,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -15970,10 +15991,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -15989,10 +16010,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16008,10 +16029,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16032,10 +16053,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16051,10 +16072,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16070,10 +16091,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16089,10 +16110,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16113,10 +16134,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16132,10 +16153,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16151,10 +16172,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16170,10 +16191,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -16189,10 +16210,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -16208,10 +16229,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -16227,10 +16248,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -16246,10 +16267,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -16265,10 +16286,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -16284,10 +16305,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x16x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -16303,10 +16324,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -16322,10 +16343,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -16341,10 +16362,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -16360,10 +16381,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -16379,10 +16400,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16398,10 +16419,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16422,10 +16443,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16441,10 +16462,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16460,10 +16481,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16479,10 +16500,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16503,10 +16524,10 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16515,7 +16536,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __ret; \ poly8x8x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 4); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 4); \ __ret; \ }) #else @@ -16523,16 +16544,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly8x8x4_t __ret; \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 4); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16541,7 +16562,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __ret; \ poly16x4x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 5); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 5); \ __ret; \ }) #else @@ -16549,16 +16570,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x4x4_t __ret; \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 5); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16567,7 +16588,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __ret; \ poly16x8x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 37); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 37); \ __ret; \ }) #else @@ -16575,16 +16596,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 poly16x8x4_t __ret; \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 37); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -16593,7 +16614,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __ret; \ uint32x4x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 50); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 50); \ __ret; \ }) #else @@ -16601,16 +16622,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x4x4_t __ret; \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 50); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -16619,7 +16640,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __ret; \ uint16x8x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 49); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 49); \ __ret; \ }) #else @@ -16627,16 +16648,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x8x4_t __ret; \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 49); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -16645,7 +16666,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __ret; \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 41); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 41); \ __ret; \ }) #else @@ -16653,16 +16674,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x4x4_t __ret; \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 41); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -16671,7 +16692,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __ret; \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 34); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 34); \ __ret; \ }) #else @@ -16679,16 +16700,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x4x4_t __ret; \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 34); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_32); \ __ret; \ }) #endif @@ -16697,7 +16718,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __ret; \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 33); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 33); \ __ret; \ }) #else @@ -16705,16 +16726,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x8x4_t __ret; \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 33); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -16723,7 +16744,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __ret; \ uint8x8x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 16); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 16); \ __ret; \ }) #else @@ -16731,16 +16752,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint8x8x4_t __ret; \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 16); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16749,7 +16770,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __ret; \ uint32x2x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 18); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 18); \ __ret; \ }) #else @@ -16757,16 +16778,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint32x2x4_t __ret; \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 18); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16775,7 +16796,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __ret; \ uint16x4x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 17); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 17); \ __ret; \ }) #else @@ -16783,16 +16804,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 uint16x4x4_t __ret; \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 17); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16801,7 +16822,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __ret; \ int8x8x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 0); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 0); \ __ret; \ }) #else @@ -16809,16 +16830,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int8x8x4_t __ret; \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 0); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ __ret; \ }) #endif @@ -16827,7 +16848,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __ret; \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 9); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 9); \ __ret; \ }) #else @@ -16835,16 +16856,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 float32x2x4_t __ret; \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 9); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16853,7 +16874,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __ret; \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 2); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 2); \ __ret; \ }) #else @@ -16861,16 +16882,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int32x2x4_t __ret; \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 2); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_32); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_32); \ __ret; \ }) #endif @@ -16879,7 +16900,7 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #define vld4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __ret; \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 1); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 1); \ __ret; \ }) #else @@ -16887,16 +16908,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 int16x4x4_t __ret; \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 1); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -16904,16 +16925,16 @@ __ai __attribute__((target("neon"))) int16x4_t vhsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -16921,16 +16942,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -16938,16 +16959,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vmaxq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -16955,16 +16976,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vmaxq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -16972,16 +16993,16 @@ __ai __attribute__((target("neon"))) int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -16989,16 +17010,16 @@ __ai __attribute__((target("neon"))) float32x4_t vmaxq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17006,16 +17027,16 @@ __ai __attribute__((target("neon"))) int32x4_t vmaxq_s32(int32x4_t __p0, int32x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17023,16 +17044,16 @@ __ai __attribute__((target("neon"))) int16x8_t vmaxq_s16(int16x8_t __p0, int16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -17040,16 +17061,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17057,16 +17078,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vmax_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17074,16 +17095,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vmax_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -17091,16 +17112,16 @@ __ai __attribute__((target("neon"))) int8x8_t vmax_s8(int8x8_t __p0, int8x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17108,16 +17129,16 @@ __ai __attribute__((target("neon"))) float32x2_t vmax_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17125,16 +17146,16 @@ __ai __attribute__((target("neon"))) int32x2_t vmax_s32(int32x2_t __p0, int32x2_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17142,16 +17163,16 @@ __ai __attribute__((target("neon"))) int16x4_t vmax_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -17159,16 +17180,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vminq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17176,16 +17197,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vminq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17193,16 +17214,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vminq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -17210,16 +17231,16 @@ __ai __attribute__((target("neon"))) int8x16_t vminq_s8(int8x16_t __p0, int8x16_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17227,16 +17248,16 @@ __ai __attribute__((target("neon"))) float32x4_t vminq_f32(float32x4_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17244,16 +17265,16 @@ __ai __attribute__((target("neon"))) int32x4_t vminq_s32(int32x4_t __p0, int32x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17261,16 +17282,16 @@ __ai __attribute__((target("neon"))) int16x8_t vminq_s16(int16x8_t __p0, int16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -17278,16 +17299,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vmin_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17295,16 +17316,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vmin_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17312,16 +17333,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vmin_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -17329,16 +17350,16 @@ __ai __attribute__((target("neon"))) int8x8_t vmin_s8(int8x8_t __p0, int8x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17346,16 +17367,16 @@ __ai __attribute__((target("neon"))) float32x2_t vmin_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17363,16 +17384,16 @@ __ai __attribute__((target("neon"))) int32x2_t vmin_s32(int32x2_t __p0, int32x2_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17386,11 +17407,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vmlaq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vmlaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -17404,11 +17425,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlaq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vmlaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17422,11 +17443,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlaq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vmlaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17440,11 +17461,11 @@ __ai __attribute__((target("neon"))) int8x16_t vmlaq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vmlaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -17458,11 +17479,11 @@ __ai __attribute__((target("neon"))) float32x4_t vmlaq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17476,11 +17497,11 @@ __ai __attribute__((target("neon"))) int32x4_t vmlaq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vmlaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17494,11 +17515,11 @@ __ai __attribute__((target("neon"))) int16x8_t vmlaq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vmlaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17512,11 +17533,11 @@ __ai __attribute__((target("neon"))) uint8x8_t vmla_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vmla_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -17530,11 +17551,11 @@ __ai __attribute__((target("neon"))) uint32x2_t vmla_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vmla_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17548,11 +17569,11 @@ __ai __attribute__((target("neon"))) uint16x4_t vmla_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vmla_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17566,11 +17587,11 @@ __ai __attribute__((target("neon"))) int8x8_t vmla_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vmla_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -17584,11 +17605,11 @@ __ai __attribute__((target("neon"))) float32x2_t vmla_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17602,11 +17623,11 @@ __ai __attribute__((target("neon"))) int32x2_t vmla_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vmla_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17620,11 +17641,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17644,11 +17665,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ uint32x4_t __s0_55 = __p0_55; \ uint32x4_t __s1_55 = __p1_55; \ uint32x2_t __s2_55 = __p2_55; \ - uint32x4_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, 3, 2, 1, 0); \ - uint32x4_t __rev1_55; __rev1_55 = __builtin_shufflevector(__s1_55, __s1_55, 3, 2, 1, 0); \ - uint32x2_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, 1, 0); \ + uint32x4_t __rev0_55; __rev0_55 = __builtin_shufflevector(__s0_55, __s0_55, __lane_reverse_128_32); \ + uint32x4_t __rev1_55; __rev1_55 = __builtin_shufflevector(__s1_55, __s1_55, __lane_reverse_128_32); \ + uint32x2_t __rev2_55; __rev2_55 = __builtin_shufflevector(__s2_55, __s2_55, __lane_reverse_64_32); \ __ret_55 = __rev0_55 + __rev1_55 * __noswap_splatq_lane_u32(__rev2_55, __p3_55); \ - __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, 3, 2, 1, 0); \ + __ret_55 = __builtin_shufflevector(__ret_55, __ret_55, __lane_reverse_128_32); \ __ret_55; \ }) #endif @@ -17668,11 +17689,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ uint16x8_t __s0_57 = __p0_57; \ uint16x8_t __s1_57 = __p1_57; \ uint16x4_t __s2_57 = __p2_57; \ - uint16x8_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_57; __rev1_57 = __builtin_shufflevector(__s1_57, __s1_57, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, 3, 2, 1, 0); \ + uint16x8_t __rev0_57; __rev0_57 = __builtin_shufflevector(__s0_57, __s0_57, __lane_reverse_128_16); \ + uint16x8_t __rev1_57; __rev1_57 = __builtin_shufflevector(__s1_57, __s1_57, __lane_reverse_128_16); \ + uint16x4_t __rev2_57; __rev2_57 = __builtin_shufflevector(__s2_57, __s2_57, __lane_reverse_64_16); \ __ret_57 = __rev0_57 + __rev1_57 * __noswap_splatq_lane_u16(__rev2_57, __p3_57); \ - __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_57 = __builtin_shufflevector(__ret_57, __ret_57, __lane_reverse_128_16); \ __ret_57; \ }) #endif @@ -17692,11 +17713,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ float32x4_t __s0_59 = __p0_59; \ float32x4_t __s1_59 = __p1_59; \ float32x2_t __s2_59 = __p2_59; \ - float32x4_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, 3, 2, 1, 0); \ - float32x4_t __rev1_59; __rev1_59 = __builtin_shufflevector(__s1_59, __s1_59, 3, 2, 1, 0); \ - float32x2_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, 1, 0); \ + float32x4_t __rev0_59; __rev0_59 = __builtin_shufflevector(__s0_59, __s0_59, __lane_reverse_128_32); \ + float32x4_t __rev1_59; __rev1_59 = __builtin_shufflevector(__s1_59, __s1_59, __lane_reverse_128_32); \ + float32x2_t __rev2_59; __rev2_59 = __builtin_shufflevector(__s2_59, __s2_59, __lane_reverse_64_32); \ __ret_59 = __rev0_59 + __rev1_59 * __noswap_splatq_lane_f32(__rev2_59, __p3_59); \ - __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, 3, 2, 1, 0); \ + __ret_59 = __builtin_shufflevector(__ret_59, __ret_59, __lane_reverse_128_32); \ __ret_59; \ }) #endif @@ -17716,11 +17737,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ int32x4_t __s0_61 = __p0_61; \ int32x4_t __s1_61 = __p1_61; \ int32x2_t __s2_61 = __p2_61; \ - int32x4_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, 3, 2, 1, 0); \ - int32x4_t __rev1_61; __rev1_61 = __builtin_shufflevector(__s1_61, __s1_61, 3, 2, 1, 0); \ - int32x2_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, 1, 0); \ + int32x4_t __rev0_61; __rev0_61 = __builtin_shufflevector(__s0_61, __s0_61, __lane_reverse_128_32); \ + int32x4_t __rev1_61; __rev1_61 = __builtin_shufflevector(__s1_61, __s1_61, __lane_reverse_128_32); \ + int32x2_t __rev2_61; __rev2_61 = __builtin_shufflevector(__s2_61, __s2_61, __lane_reverse_64_32); \ __ret_61 = __rev0_61 + __rev1_61 * __noswap_splatq_lane_s32(__rev2_61, __p3_61); \ - __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, 3, 2, 1, 0); \ + __ret_61 = __builtin_shufflevector(__ret_61, __ret_61, __lane_reverse_128_32); \ __ret_61; \ }) #endif @@ -17740,11 +17761,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ int16x8_t __s0_63 = __p0_63; \ int16x8_t __s1_63 = __p1_63; \ int16x4_t __s2_63 = __p2_63; \ - int16x8_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_63; __rev1_63 = __builtin_shufflevector(__s1_63, __s1_63, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, 3, 2, 1, 0); \ + int16x8_t __rev0_63; __rev0_63 = __builtin_shufflevector(__s0_63, __s0_63, __lane_reverse_128_16); \ + int16x8_t __rev1_63; __rev1_63 = __builtin_shufflevector(__s1_63, __s1_63, __lane_reverse_128_16); \ + int16x4_t __rev2_63; __rev2_63 = __builtin_shufflevector(__s2_63, __s2_63, __lane_reverse_64_16); \ __ret_63 = __rev0_63 + __rev1_63 * __noswap_splatq_lane_s16(__rev2_63, __p3_63); \ - __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_63 = __builtin_shufflevector(__ret_63, __ret_63, __lane_reverse_128_16); \ __ret_63; \ }) #endif @@ -17764,11 +17785,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ uint32x2_t __s0_65 = __p0_65; \ uint32x2_t __s1_65 = __p1_65; \ uint32x2_t __s2_65 = __p2_65; \ - uint32x2_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, 1, 0); \ - uint32x2_t __rev1_65; __rev1_65 = __builtin_shufflevector(__s1_65, __s1_65, 1, 0); \ - uint32x2_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, 1, 0); \ + uint32x2_t __rev0_65; __rev0_65 = __builtin_shufflevector(__s0_65, __s0_65, __lane_reverse_64_32); \ + uint32x2_t __rev1_65; __rev1_65 = __builtin_shufflevector(__s1_65, __s1_65, __lane_reverse_64_32); \ + uint32x2_t __rev2_65; __rev2_65 = __builtin_shufflevector(__s2_65, __s2_65, __lane_reverse_64_32); \ __ret_65 = __rev0_65 + __rev1_65 * __noswap_splat_lane_u32(__rev2_65, __p3_65); \ - __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, 1, 0); \ + __ret_65 = __builtin_shufflevector(__ret_65, __ret_65, __lane_reverse_64_32); \ __ret_65; \ }) #endif @@ -17788,11 +17809,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ uint16x4_t __s0_67 = __p0_67; \ uint16x4_t __s1_67 = __p1_67; \ uint16x4_t __s2_67 = __p2_67; \ - uint16x4_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, 3, 2, 1, 0); \ - uint16x4_t __rev1_67; __rev1_67 = __builtin_shufflevector(__s1_67, __s1_67, 3, 2, 1, 0); \ - uint16x4_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, 3, 2, 1, 0); \ + uint16x4_t __rev0_67; __rev0_67 = __builtin_shufflevector(__s0_67, __s0_67, __lane_reverse_64_16); \ + uint16x4_t __rev1_67; __rev1_67 = __builtin_shufflevector(__s1_67, __s1_67, __lane_reverse_64_16); \ + uint16x4_t __rev2_67; __rev2_67 = __builtin_shufflevector(__s2_67, __s2_67, __lane_reverse_64_16); \ __ret_67 = __rev0_67 + __rev1_67 * __noswap_splat_lane_u16(__rev2_67, __p3_67); \ - __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, 3, 2, 1, 0); \ + __ret_67 = __builtin_shufflevector(__ret_67, __ret_67, __lane_reverse_64_16); \ __ret_67; \ }) #endif @@ -17812,11 +17833,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ float32x2_t __s0_69 = __p0_69; \ float32x2_t __s1_69 = __p1_69; \ float32x2_t __s2_69 = __p2_69; \ - float32x2_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, 1, 0); \ - float32x2_t __rev1_69; __rev1_69 = __builtin_shufflevector(__s1_69, __s1_69, 1, 0); \ - float32x2_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, 1, 0); \ + float32x2_t __rev0_69; __rev0_69 = __builtin_shufflevector(__s0_69, __s0_69, __lane_reverse_64_32); \ + float32x2_t __rev1_69; __rev1_69 = __builtin_shufflevector(__s1_69, __s1_69, __lane_reverse_64_32); \ + float32x2_t __rev2_69; __rev2_69 = __builtin_shufflevector(__s2_69, __s2_69, __lane_reverse_64_32); \ __ret_69 = __rev0_69 + __rev1_69 * __noswap_splat_lane_f32(__rev2_69, __p3_69); \ - __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, 1, 0); \ + __ret_69 = __builtin_shufflevector(__ret_69, __ret_69, __lane_reverse_64_32); \ __ret_69; \ }) #endif @@ -17836,11 +17857,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ int32x2_t __s0_71 = __p0_71; \ int32x2_t __s1_71 = __p1_71; \ int32x2_t __s2_71 = __p2_71; \ - int32x2_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, 1, 0); \ - int32x2_t __rev1_71; __rev1_71 = __builtin_shufflevector(__s1_71, __s1_71, 1, 0); \ - int32x2_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, 1, 0); \ + int32x2_t __rev0_71; __rev0_71 = __builtin_shufflevector(__s0_71, __s0_71, __lane_reverse_64_32); \ + int32x2_t __rev1_71; __rev1_71 = __builtin_shufflevector(__s1_71, __s1_71, __lane_reverse_64_32); \ + int32x2_t __rev2_71; __rev2_71 = __builtin_shufflevector(__s2_71, __s2_71, __lane_reverse_64_32); \ __ret_71 = __rev0_71 + __rev1_71 * __noswap_splat_lane_s32(__rev2_71, __p3_71); \ - __ret_71 = __builtin_shufflevector(__ret_71, __ret_71, 1, 0); \ + __ret_71 = __builtin_shufflevector(__ret_71, __ret_71, __lane_reverse_64_32); \ __ret_71; \ }) #endif @@ -17860,11 +17881,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_s16(int16x4_t __p0, int16x4_ int16x4_t __s0_73 = __p0_73; \ int16x4_t __s1_73 = __p1_73; \ int16x4_t __s2_73 = __p2_73; \ - int16x4_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, 3, 2, 1, 0); \ - int16x4_t __rev1_73; __rev1_73 = __builtin_shufflevector(__s1_73, __s1_73, 3, 2, 1, 0); \ - int16x4_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, 3, 2, 1, 0); \ + int16x4_t __rev0_73; __rev0_73 = __builtin_shufflevector(__s0_73, __s0_73, __lane_reverse_64_16); \ + int16x4_t __rev1_73; __rev1_73 = __builtin_shufflevector(__s1_73, __s1_73, __lane_reverse_64_16); \ + int16x4_t __rev2_73; __rev2_73 = __builtin_shufflevector(__s2_73, __s2_73, __lane_reverse_64_16); \ __ret_73 = __rev0_73 + __rev1_73 * __noswap_splat_lane_s16(__rev2_73, __p3_73); \ - __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, 3, 2, 1, 0); \ + __ret_73 = __builtin_shufflevector(__ret_73, __ret_73, __lane_reverse_64_16); \ __ret_73; \ }) #endif @@ -17878,10 +17899,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlaq_n_u32(uint32x4_t __p0, uin #else __ai __attribute__((target("neon"))) uint32x4_t vmlaq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __rev1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17895,10 +17916,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlaq_n_u16(uint16x8_t __p0, uin #else __ai __attribute__((target("neon"))) uint16x8_t vmlaq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __rev1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17912,10 +17933,10 @@ __ai __attribute__((target("neon"))) float32x4_t vmlaq_n_f32(float32x4_t __p0, f #else __ai __attribute__((target("neon"))) float32x4_t vmlaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __rev1 * (float32x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17929,10 +17950,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmlaq_n_s32(int32x4_t __p0, int32 #else __ai __attribute__((target("neon"))) int32x4_t vmlaq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __rev1 * (int32x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -17946,10 +17967,10 @@ __ai __attribute__((target("neon"))) int16x8_t vmlaq_n_s16(int16x8_t __p0, int16 #else __ai __attribute__((target("neon"))) int16x8_t vmlaq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __rev1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -17963,10 +17984,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vmla_n_u32(uint32x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x2_t vmla_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __rev1 * (uint32x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -17980,10 +18001,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vmla_n_u16(uint16x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x4_t vmla_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __rev1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -17997,10 +18018,10 @@ __ai __attribute__((target("neon"))) float32x2_t vmla_n_f32(float32x2_t __p0, fl #else __ai __attribute__((target("neon"))) float32x2_t vmla_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __rev1 * (float32x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18014,10 +18035,10 @@ __ai __attribute__((target("neon"))) int32x2_t vmla_n_s32(int32x2_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x2_t vmla_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __rev1 * (int32x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18031,10 +18052,10 @@ __ai __attribute__((target("neon"))) int16x4_t vmla_n_s16(int16x4_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x4_t vmla_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __rev1 * (int16x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18048,11 +18069,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vmlsq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vmlsq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -18066,11 +18087,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlsq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vmlsq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18084,11 +18105,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlsq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vmlsq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18102,11 +18123,11 @@ __ai __attribute__((target("neon"))) int8x16_t vmlsq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vmlsq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -18120,11 +18141,11 @@ __ai __attribute__((target("neon"))) float32x4_t vmlsq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vmlsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18138,11 +18159,11 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vmlsq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18156,11 +18177,11 @@ __ai __attribute__((target("neon"))) int16x8_t vmlsq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vmlsq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18174,11 +18195,11 @@ __ai __attribute__((target("neon"))) uint8x8_t vmls_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vmls_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -18192,11 +18213,11 @@ __ai __attribute__((target("neon"))) uint32x2_t vmls_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vmls_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18210,11 +18231,11 @@ __ai __attribute__((target("neon"))) uint16x4_t vmls_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vmls_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18228,11 +18249,11 @@ __ai __attribute__((target("neon"))) int8x8_t vmls_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vmls_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -18246,11 +18267,11 @@ __ai __attribute__((target("neon"))) float32x2_t vmls_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vmls_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18264,11 +18285,11 @@ __ai __attribute__((target("neon"))) int32x2_t vmls_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vmls_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18282,11 +18303,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18306,11 +18327,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ uint32x4_t __s0_75 = __p0_75; \ uint32x4_t __s1_75 = __p1_75; \ uint32x2_t __s2_75 = __p2_75; \ - uint32x4_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, 3, 2, 1, 0); \ - uint32x4_t __rev1_75; __rev1_75 = __builtin_shufflevector(__s1_75, __s1_75, 3, 2, 1, 0); \ - uint32x2_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, 1, 0); \ + uint32x4_t __rev0_75; __rev0_75 = __builtin_shufflevector(__s0_75, __s0_75, __lane_reverse_128_32); \ + uint32x4_t __rev1_75; __rev1_75 = __builtin_shufflevector(__s1_75, __s1_75, __lane_reverse_128_32); \ + uint32x2_t __rev2_75; __rev2_75 = __builtin_shufflevector(__s2_75, __s2_75, __lane_reverse_64_32); \ __ret_75 = __rev0_75 - __rev1_75 * __noswap_splatq_lane_u32(__rev2_75, __p3_75); \ - __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, 3, 2, 1, 0); \ + __ret_75 = __builtin_shufflevector(__ret_75, __ret_75, __lane_reverse_128_32); \ __ret_75; \ }) #endif @@ -18330,11 +18351,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ uint16x8_t __s0_77 = __p0_77; \ uint16x8_t __s1_77 = __p1_77; \ uint16x4_t __s2_77 = __p2_77; \ - uint16x8_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_77; __rev1_77 = __builtin_shufflevector(__s1_77, __s1_77, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, 3, 2, 1, 0); \ + uint16x8_t __rev0_77; __rev0_77 = __builtin_shufflevector(__s0_77, __s0_77, __lane_reverse_128_16); \ + uint16x8_t __rev1_77; __rev1_77 = __builtin_shufflevector(__s1_77, __s1_77, __lane_reverse_128_16); \ + uint16x4_t __rev2_77; __rev2_77 = __builtin_shufflevector(__s2_77, __s2_77, __lane_reverse_64_16); \ __ret_77 = __rev0_77 - __rev1_77 * __noswap_splatq_lane_u16(__rev2_77, __p3_77); \ - __ret_77 = __builtin_shufflevector(__ret_77, __ret_77, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_77 = __builtin_shufflevector(__ret_77, __ret_77, __lane_reverse_128_16); \ __ret_77; \ }) #endif @@ -18354,11 +18375,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ float32x4_t __s0_79 = __p0_79; \ float32x4_t __s1_79 = __p1_79; \ float32x2_t __s2_79 = __p2_79; \ - float32x4_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, 3, 2, 1, 0); \ - float32x4_t __rev1_79; __rev1_79 = __builtin_shufflevector(__s1_79, __s1_79, 3, 2, 1, 0); \ - float32x2_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, 1, 0); \ + float32x4_t __rev0_79; __rev0_79 = __builtin_shufflevector(__s0_79, __s0_79, __lane_reverse_128_32); \ + float32x4_t __rev1_79; __rev1_79 = __builtin_shufflevector(__s1_79, __s1_79, __lane_reverse_128_32); \ + float32x2_t __rev2_79; __rev2_79 = __builtin_shufflevector(__s2_79, __s2_79, __lane_reverse_64_32); \ __ret_79 = __rev0_79 - __rev1_79 * __noswap_splatq_lane_f32(__rev2_79, __p3_79); \ - __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, 3, 2, 1, 0); \ + __ret_79 = __builtin_shufflevector(__ret_79, __ret_79, __lane_reverse_128_32); \ __ret_79; \ }) #endif @@ -18378,11 +18399,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ int32x4_t __s0_81 = __p0_81; \ int32x4_t __s1_81 = __p1_81; \ int32x2_t __s2_81 = __p2_81; \ - int32x4_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, 3, 2, 1, 0); \ - int32x4_t __rev1_81; __rev1_81 = __builtin_shufflevector(__s1_81, __s1_81, 3, 2, 1, 0); \ - int32x2_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, 1, 0); \ + int32x4_t __rev0_81; __rev0_81 = __builtin_shufflevector(__s0_81, __s0_81, __lane_reverse_128_32); \ + int32x4_t __rev1_81; __rev1_81 = __builtin_shufflevector(__s1_81, __s1_81, __lane_reverse_128_32); \ + int32x2_t __rev2_81; __rev2_81 = __builtin_shufflevector(__s2_81, __s2_81, __lane_reverse_64_32); \ __ret_81 = __rev0_81 - __rev1_81 * __noswap_splatq_lane_s32(__rev2_81, __p3_81); \ - __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, 3, 2, 1, 0); \ + __ret_81 = __builtin_shufflevector(__ret_81, __ret_81, __lane_reverse_128_32); \ __ret_81; \ }) #endif @@ -18402,11 +18423,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ int16x8_t __s0_83 = __p0_83; \ int16x8_t __s1_83 = __p1_83; \ int16x4_t __s2_83 = __p2_83; \ - int16x8_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_83; __rev1_83 = __builtin_shufflevector(__s1_83, __s1_83, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, 3, 2, 1, 0); \ + int16x8_t __rev0_83; __rev0_83 = __builtin_shufflevector(__s0_83, __s0_83, __lane_reverse_128_16); \ + int16x8_t __rev1_83; __rev1_83 = __builtin_shufflevector(__s1_83, __s1_83, __lane_reverse_128_16); \ + int16x4_t __rev2_83; __rev2_83 = __builtin_shufflevector(__s2_83, __s2_83, __lane_reverse_64_16); \ __ret_83 = __rev0_83 - __rev1_83 * __noswap_splatq_lane_s16(__rev2_83, __p3_83); \ - __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_83 = __builtin_shufflevector(__ret_83, __ret_83, __lane_reverse_128_16); \ __ret_83; \ }) #endif @@ -18426,11 +18447,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ uint32x2_t __s0_85 = __p0_85; \ uint32x2_t __s1_85 = __p1_85; \ uint32x2_t __s2_85 = __p2_85; \ - uint32x2_t __rev0_85; __rev0_85 = __builtin_shufflevector(__s0_85, __s0_85, 1, 0); \ - uint32x2_t __rev1_85; __rev1_85 = __builtin_shufflevector(__s1_85, __s1_85, 1, 0); \ - uint32x2_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, 1, 0); \ + uint32x2_t __rev0_85; __rev0_85 = __builtin_shufflevector(__s0_85, __s0_85, __lane_reverse_64_32); \ + uint32x2_t __rev1_85; __rev1_85 = __builtin_shufflevector(__s1_85, __s1_85, __lane_reverse_64_32); \ + uint32x2_t __rev2_85; __rev2_85 = __builtin_shufflevector(__s2_85, __s2_85, __lane_reverse_64_32); \ __ret_85 = __rev0_85 - __rev1_85 * __noswap_splat_lane_u32(__rev2_85, __p3_85); \ - __ret_85 = __builtin_shufflevector(__ret_85, __ret_85, 1, 0); \ + __ret_85 = __builtin_shufflevector(__ret_85, __ret_85, __lane_reverse_64_32); \ __ret_85; \ }) #endif @@ -18450,11 +18471,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ uint16x4_t __s0_87 = __p0_87; \ uint16x4_t __s1_87 = __p1_87; \ uint16x4_t __s2_87 = __p2_87; \ - uint16x4_t __rev0_87; __rev0_87 = __builtin_shufflevector(__s0_87, __s0_87, 3, 2, 1, 0); \ - uint16x4_t __rev1_87; __rev1_87 = __builtin_shufflevector(__s1_87, __s1_87, 3, 2, 1, 0); \ - uint16x4_t __rev2_87; __rev2_87 = __builtin_shufflevector(__s2_87, __s2_87, 3, 2, 1, 0); \ + uint16x4_t __rev0_87; __rev0_87 = __builtin_shufflevector(__s0_87, __s0_87, __lane_reverse_64_16); \ + uint16x4_t __rev1_87; __rev1_87 = __builtin_shufflevector(__s1_87, __s1_87, __lane_reverse_64_16); \ + uint16x4_t __rev2_87; __rev2_87 = __builtin_shufflevector(__s2_87, __s2_87, __lane_reverse_64_16); \ __ret_87 = __rev0_87 - __rev1_87 * __noswap_splat_lane_u16(__rev2_87, __p3_87); \ - __ret_87 = __builtin_shufflevector(__ret_87, __ret_87, 3, 2, 1, 0); \ + __ret_87 = __builtin_shufflevector(__ret_87, __ret_87, __lane_reverse_64_16); \ __ret_87; \ }) #endif @@ -18474,11 +18495,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ float32x2_t __s0_89 = __p0_89; \ float32x2_t __s1_89 = __p1_89; \ float32x2_t __s2_89 = __p2_89; \ - float32x2_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, 1, 0); \ - float32x2_t __rev1_89; __rev1_89 = __builtin_shufflevector(__s1_89, __s1_89, 1, 0); \ - float32x2_t __rev2_89; __rev2_89 = __builtin_shufflevector(__s2_89, __s2_89, 1, 0); \ + float32x2_t __rev0_89; __rev0_89 = __builtin_shufflevector(__s0_89, __s0_89, __lane_reverse_64_32); \ + float32x2_t __rev1_89; __rev1_89 = __builtin_shufflevector(__s1_89, __s1_89, __lane_reverse_64_32); \ + float32x2_t __rev2_89; __rev2_89 = __builtin_shufflevector(__s2_89, __s2_89, __lane_reverse_64_32); \ __ret_89 = __rev0_89 - __rev1_89 * __noswap_splat_lane_f32(__rev2_89, __p3_89); \ - __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, 1, 0); \ + __ret_89 = __builtin_shufflevector(__ret_89, __ret_89, __lane_reverse_64_32); \ __ret_89; \ }) #endif @@ -18498,11 +18519,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ int32x2_t __s0_91 = __p0_91; \ int32x2_t __s1_91 = __p1_91; \ int32x2_t __s2_91 = __p2_91; \ - int32x2_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, 1, 0); \ - int32x2_t __rev1_91; __rev1_91 = __builtin_shufflevector(__s1_91, __s1_91, 1, 0); \ - int32x2_t __rev2_91; __rev2_91 = __builtin_shufflevector(__s2_91, __s2_91, 1, 0); \ + int32x2_t __rev0_91; __rev0_91 = __builtin_shufflevector(__s0_91, __s0_91, __lane_reverse_64_32); \ + int32x2_t __rev1_91; __rev1_91 = __builtin_shufflevector(__s1_91, __s1_91, __lane_reverse_64_32); \ + int32x2_t __rev2_91; __rev2_91 = __builtin_shufflevector(__s2_91, __s2_91, __lane_reverse_64_32); \ __ret_91 = __rev0_91 - __rev1_91 * __noswap_splat_lane_s32(__rev2_91, __p3_91); \ - __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, 1, 0); \ + __ret_91 = __builtin_shufflevector(__ret_91, __ret_91, __lane_reverse_64_32); \ __ret_91; \ }) #endif @@ -18522,11 +18543,11 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_s16(int16x4_t __p0, int16x4_ int16x4_t __s0_93 = __p0_93; \ int16x4_t __s1_93 = __p1_93; \ int16x4_t __s2_93 = __p2_93; \ - int16x4_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, 3, 2, 1, 0); \ - int16x4_t __rev1_93; __rev1_93 = __builtin_shufflevector(__s1_93, __s1_93, 3, 2, 1, 0); \ - int16x4_t __rev2_93; __rev2_93 = __builtin_shufflevector(__s2_93, __s2_93, 3, 2, 1, 0); \ + int16x4_t __rev0_93; __rev0_93 = __builtin_shufflevector(__s0_93, __s0_93, __lane_reverse_64_16); \ + int16x4_t __rev1_93; __rev1_93 = __builtin_shufflevector(__s1_93, __s1_93, __lane_reverse_64_16); \ + int16x4_t __rev2_93; __rev2_93 = __builtin_shufflevector(__s2_93, __s2_93, __lane_reverse_64_16); \ __ret_93 = __rev0_93 - __rev1_93 * __noswap_splat_lane_s16(__rev2_93, __p3_93); \ - __ret_93 = __builtin_shufflevector(__ret_93, __ret_93, 3, 2, 1, 0); \ + __ret_93 = __builtin_shufflevector(__ret_93, __ret_93, __lane_reverse_64_16); \ __ret_93; \ }) #endif @@ -18540,10 +18561,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlsq_n_u32(uint32x4_t __p0, uin #else __ai __attribute__((target("neon"))) uint32x4_t vmlsq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __rev1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18557,10 +18578,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlsq_n_u16(uint16x8_t __p0, uin #else __ai __attribute__((target("neon"))) uint16x8_t vmlsq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __rev1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18574,10 +18595,10 @@ __ai __attribute__((target("neon"))) float32x4_t vmlsq_n_f32(float32x4_t __p0, f #else __ai __attribute__((target("neon"))) float32x4_t vmlsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __rev1 * (float32x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18591,10 +18612,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsq_n_s32(int32x4_t __p0, int32 #else __ai __attribute__((target("neon"))) int32x4_t vmlsq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __rev1 * (int32x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18608,10 +18629,10 @@ __ai __attribute__((target("neon"))) int16x8_t vmlsq_n_s16(int16x8_t __p0, int16 #else __ai __attribute__((target("neon"))) int16x8_t vmlsq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __rev1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18625,10 +18646,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vmls_n_u32(uint32x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x2_t vmls_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __rev1 * (uint32x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18642,10 +18663,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vmls_n_u16(uint16x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x4_t vmls_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __rev1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18659,10 +18680,10 @@ __ai __attribute__((target("neon"))) float32x2_t vmls_n_f32(float32x2_t __p0, fl #else __ai __attribute__((target("neon"))) float32x2_t vmls_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __rev1 * (float32x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18676,10 +18697,10 @@ __ai __attribute__((target("neon"))) int32x2_t vmls_n_s32(int32x2_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x2_t vmls_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __rev1 * (int32x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18693,10 +18714,10 @@ __ai __attribute__((target("neon"))) int16x4_t vmls_n_s16(int16x4_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x4_t vmls_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __rev1 * (int16x4_t) {__p2, __p2, __p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18711,7 +18732,7 @@ __ai __attribute__((target("neon"))) poly8x8_t vmov_n_p8(poly8_t __p0) { __ai __attribute__((target("neon"))) poly8x8_t vmov_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -18726,7 +18747,7 @@ __ai __attribute__((target("neon"))) poly16x4_t vmov_n_p16(poly16_t __p0) { __ai __attribute__((target("neon"))) poly16x4_t vmov_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18741,7 +18762,7 @@ __ai __attribute__((target("neon"))) poly8x16_t vmovq_n_p8(poly8_t __p0) { __ai __attribute__((target("neon"))) poly8x16_t vmovq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -18756,7 +18777,7 @@ __ai __attribute__((target("neon"))) poly16x8_t vmovq_n_p16(poly16_t __p0) { __ai __attribute__((target("neon"))) poly16x8_t vmovq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18771,7 +18792,7 @@ __ai __attribute__((target("neon"))) uint8x16_t vmovq_n_u8(uint8_t __p0) { __ai __attribute__((target("neon"))) uint8x16_t vmovq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -18786,7 +18807,7 @@ __ai __attribute__((target("neon"))) uint32x4_t vmovq_n_u32(uint32_t __p0) { __ai __attribute__((target("neon"))) uint32x4_t vmovq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18801,7 +18822,7 @@ __ai __attribute__((target("neon"))) uint64x2_t vmovq_n_u64(uint64_t __p0) { __ai __attribute__((target("neon"))) uint64x2_t vmovq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -18816,7 +18837,7 @@ __ai __attribute__((target("neon"))) uint16x8_t vmovq_n_u16(uint16_t __p0) { __ai __attribute__((target("neon"))) uint16x8_t vmovq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18831,7 +18852,7 @@ __ai __attribute__((target("neon"))) int8x16_t vmovq_n_s8(int8_t __p0) { __ai __attribute__((target("neon"))) int8x16_t vmovq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -18846,7 +18867,7 @@ __ai __attribute__((target("neon"))) float32x4_t vmovq_n_f32(float32_t __p0) { __ai __attribute__((target("neon"))) float32x4_t vmovq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18863,7 +18884,7 @@ __ai __attribute__((target("neon"))) float32x4_t vmovq_n_f32(float32_t __p0) { float16x8_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x8_t) {__s0, __s0, __s0, __s0, __s0, __s0, __s0, __s0}; \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -18878,7 +18899,7 @@ __ai __attribute__((target("neon"))) int32x4_t vmovq_n_s32(int32_t __p0) { __ai __attribute__((target("neon"))) int32x4_t vmovq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -18893,7 +18914,7 @@ __ai __attribute__((target("neon"))) int64x2_t vmovq_n_s64(int64_t __p0) { __ai __attribute__((target("neon"))) int64x2_t vmovq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -18908,7 +18929,7 @@ __ai __attribute__((target("neon"))) int16x8_t vmovq_n_s16(int16_t __p0) { __ai __attribute__((target("neon"))) int16x8_t vmovq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -18923,7 +18944,7 @@ __ai __attribute__((target("neon"))) uint8x8_t vmov_n_u8(uint8_t __p0) { __ai __attribute__((target("neon"))) uint8x8_t vmov_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -18938,7 +18959,7 @@ __ai __attribute__((target("neon"))) uint32x2_t vmov_n_u32(uint32_t __p0) { __ai __attribute__((target("neon"))) uint32x2_t vmov_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -18958,7 +18979,7 @@ __ai __attribute__((target("neon"))) uint16x4_t vmov_n_u16(uint16_t __p0) { __ai __attribute__((target("neon"))) uint16x4_t vmov_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -18973,7 +18994,7 @@ __ai __attribute__((target("neon"))) int8x8_t vmov_n_s8(int8_t __p0) { __ai __attribute__((target("neon"))) int8x8_t vmov_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -18988,7 +19009,7 @@ __ai __attribute__((target("neon"))) float32x2_t vmov_n_f32(float32_t __p0) { __ai __attribute__((target("neon"))) float32x2_t vmov_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19005,7 +19026,7 @@ __ai __attribute__((target("neon"))) float32x2_t vmov_n_f32(float32_t __p0) { float16x4_t __ret; \ float16_t __s0 = __p0; \ __ret = (float16x4_t) {__s0, __s0, __s0, __s0}; \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -19020,7 +19041,7 @@ __ai __attribute__((target("neon"))) int32x2_t vmov_n_s32(int32_t __p0) { __ai __attribute__((target("neon"))) int32x2_t vmov_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19040,7 +19061,7 @@ __ai __attribute__((target("neon"))) int16x4_t vmov_n_s16(int16_t __p0) { __ai __attribute__((target("neon"))) int16x4_t vmov_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -19048,20 +19069,20 @@ __ai __attribute__((target("neon"))) int16x4_t vmov_n_s16(int16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 49)); return __ret; } #endif @@ -19069,20 +19090,20 @@ __ai __attribute__((target("neon"))) uint16x8_t __noswap_vmovl_u8(uint8x8_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 51)); return __ret; } #endif @@ -19090,20 +19111,20 @@ __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmovl_u32(uint32x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 50)); return __ret; } #endif @@ -19111,20 +19132,20 @@ __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmovl_u16(uint16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vmovl_s8(int8x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vmovl_s8(int8x8_t __p0) { int16x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vmovl_s8(int8x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 33)); return __ret; } #endif @@ -19132,20 +19153,20 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vmovl_s8(int8x8_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vmovl_s32(int32x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vmovl_s32(int32x2_t __p0) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmovl_s32(int32x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 35)); return __ret; } #endif @@ -19153,20 +19174,20 @@ __ai __attribute__((target("neon"))) int64x2_t __noswap_vmovl_s32(int32x2_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vmovl_s16(int16x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vmovl_s16(int16x4_t __p0) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmovl_s16(int16x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmovl_v(__builtin_bit_cast(int8x8_t, __p0), 34)); return __ret; } #endif @@ -19174,20 +19195,20 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmovl_s16(int16x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 17)); return __ret; } #endif @@ -19195,20 +19216,20 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vmovn_u32(uint32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 18)); return __ret; } #endif @@ -19216,20 +19237,20 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vmovn_u64(uint64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 16)); return __ret; } #endif @@ -19237,20 +19258,20 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vmovn_u16(uint16x8_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vmovn_s32(int32x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vmovn_s32(int32x4_t __p0) { int16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vmovn_s32(int32x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 1)); return __ret; } #endif @@ -19258,20 +19279,20 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vmovn_s32(int32x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vmovn_s64(int64x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vmovn_s64(int64x2_t __p0) { int32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vmovn_s64(int64x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 2)); return __ret; } #endif @@ -19279,20 +19300,20 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vmovn_s64(int64x2_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vmovn_s16(int16x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vmovn_s16(int16x8_t __p0) { int8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vmovn_s16(int16x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vmovn_v(__builtin_bit_cast(int8x16_t, __p0), 0)); return __ret; } #endif @@ -19306,10 +19327,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vmulq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vmulq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -19323,10 +19344,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmulq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vmulq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -19340,10 +19361,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vmulq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vmulq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -19357,10 +19378,10 @@ __ai __attribute__((target("neon"))) int8x16_t vmulq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vmulq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -19374,10 +19395,10 @@ __ai __attribute__((target("neon"))) float32x4_t vmulq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vmulq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -19391,10 +19412,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmulq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vmulq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -19408,10 +19429,10 @@ __ai __attribute__((target("neon"))) int16x8_t vmulq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vmulq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -19425,10 +19446,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vmul_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vmul_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -19442,10 +19463,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vmul_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vmul_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19459,10 +19480,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vmul_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vmul_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -19476,10 +19497,10 @@ __ai __attribute__((target("neon"))) int8x8_t vmul_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vmul_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -19493,10 +19514,10 @@ __ai __attribute__((target("neon"))) float32x2_t vmul_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vmul_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19510,10 +19531,10 @@ __ai __attribute__((target("neon"))) int32x2_t vmul_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vmul_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19527,10 +19548,10 @@ __ai __attribute__((target("neon"))) int16x4_t vmul_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vmul_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -19538,16 +19559,16 @@ __ai __attribute__((target("neon"))) int16x4_t vmul_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vmul_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vmul_v((int8x8_t)__p0, (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vmul_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vmul_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vmul_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vmul_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -19555,16 +19576,16 @@ __ai __attribute__((target("neon"))) poly8x8_t vmul_p8(poly8x8_t __p0, poly8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vmulq_v((int8x16_t)__p0, (int8x16_t)__p1, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vmulq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vmulq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vmulq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -19582,10 +19603,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x uint32x4_t __ret_95; \ uint32x4_t __s0_95 = __p0_95; \ uint32x2_t __s1_95 = __p1_95; \ - uint32x4_t __rev0_95; __rev0_95 = __builtin_shufflevector(__s0_95, __s0_95, 3, 2, 1, 0); \ - uint32x2_t __rev1_95; __rev1_95 = __builtin_shufflevector(__s1_95, __s1_95, 1, 0); \ + uint32x4_t __rev0_95; __rev0_95 = __builtin_shufflevector(__s0_95, __s0_95, __lane_reverse_128_32); \ + uint32x2_t __rev1_95; __rev1_95 = __builtin_shufflevector(__s1_95, __s1_95, __lane_reverse_64_32); \ __ret_95 = __rev0_95 * __noswap_splatq_lane_u32(__rev1_95, __p2_95); \ - __ret_95 = __builtin_shufflevector(__ret_95, __ret_95, 3, 2, 1, 0); \ + __ret_95 = __builtin_shufflevector(__ret_95, __ret_95, __lane_reverse_128_32); \ __ret_95; \ }) #endif @@ -19603,10 +19624,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x uint16x8_t __ret_97; \ uint16x8_t __s0_97 = __p0_97; \ uint16x4_t __s1_97 = __p1_97; \ - uint16x8_t __rev0_97; __rev0_97 = __builtin_shufflevector(__s0_97, __s0_97, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev1_97; __rev1_97 = __builtin_shufflevector(__s1_97, __s1_97, 3, 2, 1, 0); \ + uint16x8_t __rev0_97; __rev0_97 = __builtin_shufflevector(__s0_97, __s0_97, __lane_reverse_128_16); \ + uint16x4_t __rev1_97; __rev1_97 = __builtin_shufflevector(__s1_97, __s1_97, __lane_reverse_64_16); \ __ret_97 = __rev0_97 * __noswap_splatq_lane_u16(__rev1_97, __p2_97); \ - __ret_97 = __builtin_shufflevector(__ret_97, __ret_97, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_97 = __builtin_shufflevector(__ret_97, __ret_97, __lane_reverse_128_16); \ __ret_97; \ }) #endif @@ -19624,10 +19645,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x float32x4_t __ret_99; \ float32x4_t __s0_99 = __p0_99; \ float32x2_t __s1_99 = __p1_99; \ - float32x4_t __rev0_99; __rev0_99 = __builtin_shufflevector(__s0_99, __s0_99, 3, 2, 1, 0); \ - float32x2_t __rev1_99; __rev1_99 = __builtin_shufflevector(__s1_99, __s1_99, 1, 0); \ + float32x4_t __rev0_99; __rev0_99 = __builtin_shufflevector(__s0_99, __s0_99, __lane_reverse_128_32); \ + float32x2_t __rev1_99; __rev1_99 = __builtin_shufflevector(__s1_99, __s1_99, __lane_reverse_64_32); \ __ret_99 = __rev0_99 * __noswap_splatq_lane_f32(__rev1_99, __p2_99); \ - __ret_99 = __builtin_shufflevector(__ret_99, __ret_99, 3, 2, 1, 0); \ + __ret_99 = __builtin_shufflevector(__ret_99, __ret_99, __lane_reverse_128_32); \ __ret_99; \ }) #endif @@ -19645,10 +19666,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x int32x4_t __ret_101; \ int32x4_t __s0_101 = __p0_101; \ int32x2_t __s1_101 = __p1_101; \ - int32x4_t __rev0_101; __rev0_101 = __builtin_shufflevector(__s0_101, __s0_101, 3, 2, 1, 0); \ - int32x2_t __rev1_101; __rev1_101 = __builtin_shufflevector(__s1_101, __s1_101, 1, 0); \ + int32x4_t __rev0_101; __rev0_101 = __builtin_shufflevector(__s0_101, __s0_101, __lane_reverse_128_32); \ + int32x2_t __rev1_101; __rev1_101 = __builtin_shufflevector(__s1_101, __s1_101, __lane_reverse_64_32); \ __ret_101 = __rev0_101 * __noswap_splatq_lane_s32(__rev1_101, __p2_101); \ - __ret_101 = __builtin_shufflevector(__ret_101, __ret_101, 3, 2, 1, 0); \ + __ret_101 = __builtin_shufflevector(__ret_101, __ret_101, __lane_reverse_128_32); \ __ret_101; \ }) #endif @@ -19666,10 +19687,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x int16x8_t __ret_103; \ int16x8_t __s0_103 = __p0_103; \ int16x4_t __s1_103 = __p1_103; \ - int16x8_t __rev0_103; __rev0_103 = __builtin_shufflevector(__s0_103, __s0_103, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_103; __rev1_103 = __builtin_shufflevector(__s1_103, __s1_103, 3, 2, 1, 0); \ + int16x8_t __rev0_103; __rev0_103 = __builtin_shufflevector(__s0_103, __s0_103, __lane_reverse_128_16); \ + int16x4_t __rev1_103; __rev1_103 = __builtin_shufflevector(__s1_103, __s1_103, __lane_reverse_64_16); \ __ret_103 = __rev0_103 * __noswap_splatq_lane_s16(__rev1_103, __p2_103); \ - __ret_103 = __builtin_shufflevector(__ret_103, __ret_103, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_103 = __builtin_shufflevector(__ret_103, __ret_103, __lane_reverse_128_16); \ __ret_103; \ }) #endif @@ -19687,10 +19708,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x uint32x2_t __ret_105; \ uint32x2_t __s0_105 = __p0_105; \ uint32x2_t __s1_105 = __p1_105; \ - uint32x2_t __rev0_105; __rev0_105 = __builtin_shufflevector(__s0_105, __s0_105, 1, 0); \ - uint32x2_t __rev1_105; __rev1_105 = __builtin_shufflevector(__s1_105, __s1_105, 1, 0); \ + uint32x2_t __rev0_105; __rev0_105 = __builtin_shufflevector(__s0_105, __s0_105, __lane_reverse_64_32); \ + uint32x2_t __rev1_105; __rev1_105 = __builtin_shufflevector(__s1_105, __s1_105, __lane_reverse_64_32); \ __ret_105 = __rev0_105 * __noswap_splat_lane_u32(__rev1_105, __p2_105); \ - __ret_105 = __builtin_shufflevector(__ret_105, __ret_105, 1, 0); \ + __ret_105 = __builtin_shufflevector(__ret_105, __ret_105, __lane_reverse_64_32); \ __ret_105; \ }) #endif @@ -19708,10 +19729,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x uint16x4_t __ret_107; \ uint16x4_t __s0_107 = __p0_107; \ uint16x4_t __s1_107 = __p1_107; \ - uint16x4_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, 3, 2, 1, 0); \ - uint16x4_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, 3, 2, 1, 0); \ + uint16x4_t __rev0_107; __rev0_107 = __builtin_shufflevector(__s0_107, __s0_107, __lane_reverse_64_16); \ + uint16x4_t __rev1_107; __rev1_107 = __builtin_shufflevector(__s1_107, __s1_107, __lane_reverse_64_16); \ __ret_107 = __rev0_107 * __noswap_splat_lane_u16(__rev1_107, __p2_107); \ - __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, 3, 2, 1, 0); \ + __ret_107 = __builtin_shufflevector(__ret_107, __ret_107, __lane_reverse_64_16); \ __ret_107; \ }) #endif @@ -19729,10 +19750,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x float32x2_t __ret_109; \ float32x2_t __s0_109 = __p0_109; \ float32x2_t __s1_109 = __p1_109; \ - float32x2_t __rev0_109; __rev0_109 = __builtin_shufflevector(__s0_109, __s0_109, 1, 0); \ - float32x2_t __rev1_109; __rev1_109 = __builtin_shufflevector(__s1_109, __s1_109, 1, 0); \ + float32x2_t __rev0_109; __rev0_109 = __builtin_shufflevector(__s0_109, __s0_109, __lane_reverse_64_32); \ + float32x2_t __rev1_109; __rev1_109 = __builtin_shufflevector(__s1_109, __s1_109, __lane_reverse_64_32); \ __ret_109 = __rev0_109 * __noswap_splat_lane_f32(__rev1_109, __p2_109); \ - __ret_109 = __builtin_shufflevector(__ret_109, __ret_109, 1, 0); \ + __ret_109 = __builtin_shufflevector(__ret_109, __ret_109, __lane_reverse_64_32); \ __ret_109; \ }) #endif @@ -19750,10 +19771,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x int32x2_t __ret_111; \ int32x2_t __s0_111 = __p0_111; \ int32x2_t __s1_111 = __p1_111; \ - int32x2_t __rev0_111; __rev0_111 = __builtin_shufflevector(__s0_111, __s0_111, 1, 0); \ - int32x2_t __rev1_111; __rev1_111 = __builtin_shufflevector(__s1_111, __s1_111, 1, 0); \ + int32x2_t __rev0_111; __rev0_111 = __builtin_shufflevector(__s0_111, __s0_111, __lane_reverse_64_32); \ + int32x2_t __rev1_111; __rev1_111 = __builtin_shufflevector(__s1_111, __s1_111, __lane_reverse_64_32); \ __ret_111 = __rev0_111 * __noswap_splat_lane_s32(__rev1_111, __p2_111); \ - __ret_111 = __builtin_shufflevector(__ret_111, __ret_111, 1, 0); \ + __ret_111 = __builtin_shufflevector(__ret_111, __ret_111, __lane_reverse_64_32); \ __ret_111; \ }) #endif @@ -19771,10 +19792,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x int16x4_t __ret_113; \ int16x4_t __s0_113 = __p0_113; \ int16x4_t __s1_113 = __p1_113; \ - int16x4_t __rev0_113; __rev0_113 = __builtin_shufflevector(__s0_113, __s0_113, 3, 2, 1, 0); \ - int16x4_t __rev1_113; __rev1_113 = __builtin_shufflevector(__s1_113, __s1_113, 3, 2, 1, 0); \ + int16x4_t __rev0_113; __rev0_113 = __builtin_shufflevector(__s0_113, __s0_113, __lane_reverse_64_16); \ + int16x4_t __rev1_113; __rev1_113 = __builtin_shufflevector(__s1_113, __s1_113, __lane_reverse_64_16); \ __ret_113 = __rev0_113 * __noswap_splat_lane_s16(__rev1_113, __p2_113); \ - __ret_113 = __builtin_shufflevector(__ret_113, __ret_113, 3, 2, 1, 0); \ + __ret_113 = __builtin_shufflevector(__ret_113, __ret_113, __lane_reverse_64_16); \ __ret_113; \ }) #endif @@ -19788,9 +19809,9 @@ __ai __attribute__((target("neon"))) uint32x4_t vmulq_n_u32(uint32x4_t __p0, uin #else __ai __attribute__((target("neon"))) uint32x4_t vmulq_n_u32(uint32x4_t __p0, uint32_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __rev0 * (uint32x4_t) {__p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -19804,9 +19825,9 @@ __ai __attribute__((target("neon"))) uint16x8_t vmulq_n_u16(uint16x8_t __p0, uin #else __ai __attribute__((target("neon"))) uint16x8_t vmulq_n_u16(uint16x8_t __p0, uint16_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __rev0 * (uint16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -19820,9 +19841,9 @@ __ai __attribute__((target("neon"))) float32x4_t vmulq_n_f32(float32x4_t __p0, f #else __ai __attribute__((target("neon"))) float32x4_t vmulq_n_f32(float32x4_t __p0, float32_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __rev0 * (float32x4_t) {__p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -19836,9 +19857,9 @@ __ai __attribute__((target("neon"))) int32x4_t vmulq_n_s32(int32x4_t __p0, int32 #else __ai __attribute__((target("neon"))) int32x4_t vmulq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __rev0 * (int32x4_t) {__p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -19852,9 +19873,9 @@ __ai __attribute__((target("neon"))) int16x8_t vmulq_n_s16(int16x8_t __p0, int16 #else __ai __attribute__((target("neon"))) int16x8_t vmulq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __rev0 * (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -19868,9 +19889,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vmul_n_u32(uint32x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x2_t vmul_n_u32(uint32x2_t __p0, uint32_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __rev0 * (uint32x2_t) {__p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19884,9 +19905,9 @@ __ai __attribute__((target("neon"))) uint16x4_t vmul_n_u16(uint16x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x4_t vmul_n_u16(uint16x4_t __p0, uint16_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __rev0 * (uint16x4_t) {__p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -19900,9 +19921,9 @@ __ai __attribute__((target("neon"))) float32x2_t vmul_n_f32(float32x2_t __p0, fl #else __ai __attribute__((target("neon"))) float32x2_t vmul_n_f32(float32x2_t __p0, float32_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __rev0 * (float32x2_t) {__p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19916,9 +19937,9 @@ __ai __attribute__((target("neon"))) int32x2_t vmul_n_s32(int32x2_t __p0, int32_ #else __ai __attribute__((target("neon"))) int32x2_t vmul_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __rev0 * (int32x2_t) {__p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -19932,9 +19953,9 @@ __ai __attribute__((target("neon"))) int16x4_t vmul_n_s16(int16x4_t __p0, int16_ #else __ai __attribute__((target("neon"))) int16x4_t vmul_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __rev0 * (int16x4_t) {__p1, __p1, __p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -19942,21 +19963,21 @@ __ai __attribute__((target("neon"))) int16x4_t vmul_n_s16(int16x4_t __p0, int16_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x8_t vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; - __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 37); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 37)); return __ret; } #else __ai __attribute__((target("neon"))) poly16x8_t vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 37); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 37)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t __noswap_vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; - __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 37); + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 37)); return __ret; } #endif @@ -19964,21 +19985,21 @@ __ai __attribute__((target("neon"))) poly16x8_t __noswap_vmull_p8(poly8x8_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 49)); return __ret; } #endif @@ -19986,21 +20007,21 @@ __ai __attribute__((target("neon"))) uint16x8_t __noswap_vmull_u8(uint8x8_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 51)); return __ret; } #endif @@ -20008,21 +20029,21 @@ __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmull_u32(uint32x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 50)); return __ret; } #endif @@ -20030,21 +20051,21 @@ __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmull_u16(uint16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 33)); return __ret; } #endif @@ -20052,21 +20073,21 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vmull_s8(int8x8_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 35)); return __ret; } #endif @@ -20074,21 +20095,21 @@ __ai __attribute__((target("neon"))) int64x2_t __noswap_vmull_s32(int32x2_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 34)); return __ret; } #endif @@ -20106,10 +20127,10 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmull_s16(int16x4_t __p0 uint64x2_t __ret_115; \ uint32x2_t __s0_115 = __p0_115; \ uint32x2_t __s1_115 = __p1_115; \ - uint32x2_t __rev0_115; __rev0_115 = __builtin_shufflevector(__s0_115, __s0_115, 1, 0); \ - uint32x2_t __rev1_115; __rev1_115 = __builtin_shufflevector(__s1_115, __s1_115, 1, 0); \ + uint32x2_t __rev0_115; __rev0_115 = __builtin_shufflevector(__s0_115, __s0_115, __lane_reverse_64_32); \ + uint32x2_t __rev1_115; __rev1_115 = __builtin_shufflevector(__s1_115, __s1_115, __lane_reverse_64_32); \ __ret_115 = __noswap_vmull_u32(__rev0_115, __noswap_splat_lane_u32(__rev1_115, __p2_115)); \ - __ret_115 = __builtin_shufflevector(__ret_115, __ret_115, 1, 0); \ + __ret_115 = __builtin_shufflevector(__ret_115, __ret_115, __lane_reverse_128_64); \ __ret_115; \ }) #endif @@ -20127,10 +20148,10 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmull_s16(int16x4_t __p0 uint32x4_t __ret_117; \ uint16x4_t __s0_117 = __p0_117; \ uint16x4_t __s1_117 = __p1_117; \ - uint16x4_t __rev0_117; __rev0_117 = __builtin_shufflevector(__s0_117, __s0_117, 3, 2, 1, 0); \ - uint16x4_t __rev1_117; __rev1_117 = __builtin_shufflevector(__s1_117, __s1_117, 3, 2, 1, 0); \ + uint16x4_t __rev0_117; __rev0_117 = __builtin_shufflevector(__s0_117, __s0_117, __lane_reverse_64_16); \ + uint16x4_t __rev1_117; __rev1_117 = __builtin_shufflevector(__s1_117, __s1_117, __lane_reverse_64_16); \ __ret_117 = __noswap_vmull_u16(__rev0_117, __noswap_splat_lane_u16(__rev1_117, __p2_117)); \ - __ret_117 = __builtin_shufflevector(__ret_117, __ret_117, 3, 2, 1, 0); \ + __ret_117 = __builtin_shufflevector(__ret_117, __ret_117, __lane_reverse_128_32); \ __ret_117; \ }) #endif @@ -20148,10 +20169,10 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmull_s16(int16x4_t __p0 int64x2_t __ret_119; \ int32x2_t __s0_119 = __p0_119; \ int32x2_t __s1_119 = __p1_119; \ - int32x2_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, 1, 0); \ - int32x2_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, 1, 0); \ + int32x2_t __rev0_119; __rev0_119 = __builtin_shufflevector(__s0_119, __s0_119, __lane_reverse_64_32); \ + int32x2_t __rev1_119; __rev1_119 = __builtin_shufflevector(__s1_119, __s1_119, __lane_reverse_64_32); \ __ret_119 = __noswap_vmull_s32(__rev0_119, __noswap_splat_lane_s32(__rev1_119, __p2_119)); \ - __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, 1, 0); \ + __ret_119 = __builtin_shufflevector(__ret_119, __ret_119, __lane_reverse_128_64); \ __ret_119; \ }) #endif @@ -20169,10 +20190,10 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmull_s16(int16x4_t __p0 int32x4_t __ret_121; \ int16x4_t __s0_121 = __p0_121; \ int16x4_t __s1_121 = __p1_121; \ - int16x4_t __rev0_121; __rev0_121 = __builtin_shufflevector(__s0_121, __s0_121, 3, 2, 1, 0); \ - int16x4_t __rev1_121; __rev1_121 = __builtin_shufflevector(__s1_121, __s1_121, 3, 2, 1, 0); \ + int16x4_t __rev0_121; __rev0_121 = __builtin_shufflevector(__s0_121, __s0_121, __lane_reverse_64_16); \ + int16x4_t __rev1_121; __rev1_121 = __builtin_shufflevector(__s1_121, __s1_121, __lane_reverse_64_16); \ __ret_121 = __noswap_vmull_s16(__rev0_121, __noswap_splat_lane_s16(__rev1_121, __p2_121)); \ - __ret_121 = __builtin_shufflevector(__ret_121, __ret_121, 3, 2, 1, 0); \ + __ret_121 = __builtin_shufflevector(__ret_121, __ret_121, __lane_reverse_128_32); \ __ret_121; \ }) #endif @@ -20186,9 +20207,9 @@ __ai __attribute__((target("neon"))) uint64x2_t vmull_n_u32(uint32x2_t __p0, uin #else __ai __attribute__((target("neon"))) uint64x2_t vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __noswap_vmull_u32(__rev0, (uint32x2_t) {__p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { @@ -20207,9 +20228,9 @@ __ai __attribute__((target("neon"))) uint32x4_t vmull_n_u16(uint16x4_t __p0, uin #else __ai __attribute__((target("neon"))) uint32x4_t vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __noswap_vmull_u16(__rev0, (uint16x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { @@ -20228,9 +20249,9 @@ __ai __attribute__((target("neon"))) int64x2_t vmull_n_s32(int32x2_t __p0, int32 #else __ai __attribute__((target("neon"))) int64x2_t vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __noswap_vmull_s32(__rev0, (int32x2_t) {__p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmull_n_s32(int32x2_t __p0, int32_t __p1) { @@ -20249,9 +20270,9 @@ __ai __attribute__((target("neon"))) int32x4_t vmull_n_s16(int16x4_t __p0, int16 #else __ai __attribute__((target("neon"))) int32x4_t vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __noswap_vmull_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmull_n_s16(int16x4_t __p0, int16_t __p1) { @@ -20270,9 +20291,9 @@ __ai __attribute__((target("neon"))) poly8x8_t vmvn_p8(poly8x8_t __p0) { #else __ai __attribute__((target("neon"))) poly8x8_t vmvn_p8(poly8x8_t __p0) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -20286,9 +20307,9 @@ __ai __attribute__((target("neon"))) poly8x16_t vmvnq_p8(poly8x16_t __p0) { #else __ai __attribute__((target("neon"))) poly8x16_t vmvnq_p8(poly8x16_t __p0) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20302,9 +20323,9 @@ __ai __attribute__((target("neon"))) uint8x16_t vmvnq_u8(uint8x16_t __p0) { #else __ai __attribute__((target("neon"))) uint8x16_t vmvnq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20318,9 +20339,9 @@ __ai __attribute__((target("neon"))) uint32x4_t vmvnq_u32(uint32x4_t __p0) { #else __ai __attribute__((target("neon"))) uint32x4_t vmvnq_u32(uint32x4_t __p0) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20334,9 +20355,9 @@ __ai __attribute__((target("neon"))) uint16x8_t vmvnq_u16(uint16x8_t __p0) { #else __ai __attribute__((target("neon"))) uint16x8_t vmvnq_u16(uint16x8_t __p0) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -20350,9 +20371,9 @@ __ai __attribute__((target("neon"))) int8x16_t vmvnq_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x16_t vmvnq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20366,9 +20387,9 @@ __ai __attribute__((target("neon"))) int32x4_t vmvnq_s32(int32x4_t __p0) { #else __ai __attribute__((target("neon"))) int32x4_t vmvnq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20382,9 +20403,9 @@ __ai __attribute__((target("neon"))) int16x8_t vmvnq_s16(int16x8_t __p0) { #else __ai __attribute__((target("neon"))) int16x8_t vmvnq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -20398,9 +20419,9 @@ __ai __attribute__((target("neon"))) uint8x8_t vmvn_u8(uint8x8_t __p0) { #else __ai __attribute__((target("neon"))) uint8x8_t vmvn_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -20414,9 +20435,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vmvn_u32(uint32x2_t __p0) { #else __ai __attribute__((target("neon"))) uint32x2_t vmvn_u32(uint32x2_t __p0) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -20430,9 +20451,9 @@ __ai __attribute__((target("neon"))) uint16x4_t vmvn_u16(uint16x4_t __p0) { #else __ai __attribute__((target("neon"))) uint16x4_t vmvn_u16(uint16x4_t __p0) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -20446,9 +20467,9 @@ __ai __attribute__((target("neon"))) int8x8_t vmvn_s8(int8x8_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vmvn_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -20462,9 +20483,9 @@ __ai __attribute__((target("neon"))) int32x2_t vmvn_s32(int32x2_t __p0) { #else __ai __attribute__((target("neon"))) int32x2_t vmvn_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -20478,9 +20499,9 @@ __ai __attribute__((target("neon"))) int16x4_t vmvn_s16(int16x4_t __p0) { #else __ai __attribute__((target("neon"))) int16x4_t vmvn_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = ~__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -20494,9 +20515,9 @@ __ai __attribute__((target("neon"))) int8x16_t vnegq_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x16_t vnegq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20510,9 +20531,9 @@ __ai __attribute__((target("neon"))) float32x4_t vnegq_f32(float32x4_t __p0) { #else __ai __attribute__((target("neon"))) float32x4_t vnegq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20526,9 +20547,9 @@ __ai __attribute__((target("neon"))) int32x4_t vnegq_s32(int32x4_t __p0) { #else __ai __attribute__((target("neon"))) int32x4_t vnegq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20542,9 +20563,9 @@ __ai __attribute__((target("neon"))) int16x8_t vnegq_s16(int16x8_t __p0) { #else __ai __attribute__((target("neon"))) int16x8_t vnegq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -20558,9 +20579,9 @@ __ai __attribute__((target("neon"))) int8x8_t vneg_s8(int8x8_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vneg_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -20574,9 +20595,9 @@ __ai __attribute__((target("neon"))) float32x2_t vneg_f32(float32x2_t __p0) { #else __ai __attribute__((target("neon"))) float32x2_t vneg_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -20590,9 +20611,9 @@ __ai __attribute__((target("neon"))) int32x2_t vneg_s32(int32x2_t __p0) { #else __ai __attribute__((target("neon"))) int32x2_t vneg_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -20606,9 +20627,9 @@ __ai __attribute__((target("neon"))) int16x4_t vneg_s16(int16x4_t __p0) { #else __ai __attribute__((target("neon"))) int16x4_t vneg_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -20622,10 +20643,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vornq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vornq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20639,10 +20660,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vornq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vornq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20656,10 +20677,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vornq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t vornq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -20673,10 +20694,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vornq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vornq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -20690,10 +20711,10 @@ __ai __attribute__((target("neon"))) int8x16_t vornq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vornq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20707,10 +20728,10 @@ __ai __attribute__((target("neon"))) int32x4_t vornq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vornq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20724,10 +20745,10 @@ __ai __attribute__((target("neon"))) int64x2_t vornq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t vornq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -20741,10 +20762,10 @@ __ai __attribute__((target("neon"))) int16x8_t vornq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vornq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -20758,10 +20779,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vorn_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vorn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -20775,10 +20796,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vorn_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vorn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -20797,10 +20818,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vorn_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vorn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -20814,10 +20835,10 @@ __ai __attribute__((target("neon"))) int8x8_t vorn_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vorn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -20831,10 +20852,10 @@ __ai __attribute__((target("neon"))) int32x2_t vorn_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vorn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -20853,10 +20874,10 @@ __ai __attribute__((target("neon"))) int16x4_t vorn_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vorn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 | ~__rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -20870,10 +20891,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vorrq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vorrq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20887,10 +20908,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vorrq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vorrq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20904,10 +20925,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vorrq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t vorrq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -20921,10 +20942,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vorrq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vorrq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -20938,10 +20959,10 @@ __ai __attribute__((target("neon"))) int8x16_t vorrq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vorrq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -20955,10 +20976,10 @@ __ai __attribute__((target("neon"))) int32x4_t vorrq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vorrq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -20972,10 +20993,10 @@ __ai __attribute__((target("neon"))) int64x2_t vorrq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t vorrq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -20989,10 +21010,10 @@ __ai __attribute__((target("neon"))) int16x8_t vorrq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vorrq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -21006,10 +21027,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vorr_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vorr_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21023,10 +21044,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vorr_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vorr_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21045,10 +21066,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vorr_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vorr_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21062,10 +21083,10 @@ __ai __attribute__((target("neon"))) int8x8_t vorr_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vorr_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21079,10 +21100,10 @@ __ai __attribute__((target("neon"))) int32x2_t vorr_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vorr_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21101,10 +21122,10 @@ __ai __attribute__((target("neon"))) int16x4_t vorr_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vorr_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 | __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21112,16 +21133,16 @@ __ai __attribute__((target("neon"))) int16x4_t vorr_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vpadalq_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vpadalq_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -21129,16 +21150,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vpadalq_u8(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vpadalq_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vpadalq_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -21146,16 +21167,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vpadalq_u32(uint64x2_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vpadalq_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vpadalq_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -21163,16 +21184,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vpadalq_u16(uint32x4_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vpadalq_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vpadalq_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -21180,16 +21201,16 @@ __ai __attribute__((target("neon"))) int16x8_t vpadalq_s8(int16x8_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vpadalq_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vpadalq_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -21197,16 +21218,16 @@ __ai __attribute__((target("neon"))) int64x2_t vpadalq_s32(int64x2_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vpadalq_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vpadalq_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpadalq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -21214,16 +21235,16 @@ __ai __attribute__((target("neon"))) int32x4_t vpadalq_s16(int32x4_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vpadal_u8(uint16x4_t __p0, uint8x8_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vpadal_u8(uint16x4_t __p0, uint8x8_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21231,14 +21252,14 @@ __ai __attribute__((target("neon"))) uint16x4_t vpadal_u8(uint16x4_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x1_t vpadal_u32(uint64x1_t __p0, uint32x2_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x1_t vpadal_u32(uint64x1_t __p0, uint32x2_t __p1) { uint64x1_t __ret; - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__rev1, 19); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __rev1), 19)); return __ret; } #endif @@ -21246,16 +21267,16 @@ __ai __attribute__((target("neon"))) uint64x1_t vpadal_u32(uint64x1_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vpadal_u16(uint32x2_t __p0, uint16x4_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vpadal_u16(uint32x2_t __p0, uint16x4_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21263,16 +21284,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vpadal_u16(uint32x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vpadal_s8(int16x4_t __p0, int8x8_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vpadal_s8(int16x4_t __p0, int8x8_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21280,14 +21301,14 @@ __ai __attribute__((target("neon"))) int16x4_t vpadal_s8(int16x4_t __p0, int8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x1_t vpadal_s32(int64x1_t __p0, int32x2_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #else __ai __attribute__((target("neon"))) int64x1_t vpadal_s32(int64x1_t __p0, int32x2_t __p1) { int64x1_t __ret; - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__rev1, 3); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __rev1), 3)); return __ret; } #endif @@ -21295,16 +21316,16 @@ __ai __attribute__((target("neon"))) int64x1_t vpadal_s32(int64x1_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vpadal_s16(int32x2_t __p0, int16x4_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vpadal_s16(int32x2_t __p0, int16x4_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vpadal_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpadal_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21312,16 +21333,16 @@ __ai __attribute__((target("neon"))) int32x2_t vpadal_s16(int32x2_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vpadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vpadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21329,16 +21350,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vpadd_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vpadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vpadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21346,16 +21367,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vpadd_u32(uint32x2_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vpadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vpadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21363,16 +21384,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vpadd_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vpadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vpadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21380,16 +21401,16 @@ __ai __attribute__((target("neon"))) int8x8_t vpadd_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vpadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vpadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21397,16 +21418,16 @@ __ai __attribute__((target("neon"))) float32x2_t vpadd_f32(float32x2_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vpadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vpadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21414,16 +21435,16 @@ __ai __attribute__((target("neon"))) int32x2_t vpadd_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vpadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vpadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vpadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21431,15 +21452,15 @@ __ai __attribute__((target("neon"))) int16x4_t vpadd_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vpaddlq_u8(uint8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vpaddlq_u8(uint8x16_t __p0) { uint16x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -21447,15 +21468,15 @@ __ai __attribute__((target("neon"))) uint16x8_t vpaddlq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vpaddlq_u32(uint32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vpaddlq_u32(uint32x4_t __p0) { uint64x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -21463,15 +21484,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vpaddlq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vpaddlq_u16(uint16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vpaddlq_u16(uint16x8_t __p0) { uint32x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -21479,15 +21500,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vpaddlq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vpaddlq_s8(int8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vpaddlq_s8(int8x16_t __p0) { int16x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -21495,15 +21516,15 @@ __ai __attribute__((target("neon"))) int16x8_t vpaddlq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vpaddlq_s32(int32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vpaddlq_s32(int32x4_t __p0) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -21511,15 +21532,15 @@ __ai __attribute__((target("neon"))) int64x2_t vpaddlq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vpaddlq_s16(int16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vpaddlq_s16(int16x8_t __p0) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpaddlq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -21527,15 +21548,15 @@ __ai __attribute__((target("neon"))) int32x4_t vpaddlq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vpaddl_u8(uint8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vpaddl_u8(uint8x8_t __p0) { uint16x4_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21543,14 +21564,14 @@ __ai __attribute__((target("neon"))) uint16x4_t vpaddl_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x1_t vpaddl_u32(uint32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x1_t vpaddl_u32(uint32x2_t __p0) { uint64x1_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 19); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __rev0), 19)); return __ret; } #endif @@ -21558,15 +21579,15 @@ __ai __attribute__((target("neon"))) uint64x1_t vpaddl_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vpaddl_u16(uint16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vpaddl_u16(uint16x4_t __p0) { uint32x2_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21574,15 +21595,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vpaddl_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vpaddl_s8(int8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vpaddl_s8(int8x8_t __p0) { int16x4_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21590,14 +21611,14 @@ __ai __attribute__((target("neon"))) int16x4_t vpaddl_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x1_t vpaddl_s32(int32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #else __ai __attribute__((target("neon"))) int64x1_t vpaddl_s32(int32x2_t __p0) { int64x1_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 3); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __rev0), 3)); return __ret; } #endif @@ -21605,15 +21626,15 @@ __ai __attribute__((target("neon"))) int64x1_t vpaddl_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vpaddl_s16(int16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vpaddl_s16(int16x4_t __p0) { int32x2_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpaddl_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21621,16 +21642,16 @@ __ai __attribute__((target("neon"))) int32x2_t vpaddl_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vpmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vpmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21638,16 +21659,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vpmax_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vpmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vpmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21655,16 +21676,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vpmax_u32(uint32x2_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vpmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vpmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21672,16 +21693,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vpmax_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vpmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vpmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21689,16 +21710,16 @@ __ai __attribute__((target("neon"))) int8x8_t vpmax_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vpmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vpmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21706,16 +21727,16 @@ __ai __attribute__((target("neon"))) float32x2_t vpmax_f32(float32x2_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vpmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vpmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21723,16 +21744,16 @@ __ai __attribute__((target("neon"))) int32x2_t vpmax_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vpmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vpmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vpmax_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpmax_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21740,16 +21761,16 @@ __ai __attribute__((target("neon"))) int16x4_t vpmax_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vpmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vpmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21757,16 +21778,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vpmin_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vpmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vpmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21774,16 +21795,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vpmin_u32(uint32x2_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vpmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vpmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21791,16 +21812,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vpmin_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vpmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vpmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21808,16 +21829,16 @@ __ai __attribute__((target("neon"))) int8x8_t vpmin_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vpmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vpmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21825,16 +21846,16 @@ __ai __attribute__((target("neon"))) float32x2_t vpmin_f32(float32x2_t __p0, flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vpmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vpmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21842,16 +21863,16 @@ __ai __attribute__((target("neon"))) int32x2_t vpmin_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vpmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vpmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vpmin_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vpmin_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21859,15 +21880,15 @@ __ai __attribute__((target("neon"))) int16x4_t vpmin_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqabsq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqabsq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -21875,15 +21896,15 @@ __ai __attribute__((target("neon"))) int8x16_t vqabsq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqabsq_s32(int32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqabsq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -21891,15 +21912,15 @@ __ai __attribute__((target("neon"))) int32x4_t vqabsq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqabsq_s16(int16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqabsq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -21907,15 +21928,15 @@ __ai __attribute__((target("neon"))) int16x8_t vqabsq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqabs_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqabs_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqabs_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -21923,15 +21944,15 @@ __ai __attribute__((target("neon"))) int8x8_t vqabs_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqabs_s32(int32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqabs_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqabs_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -21939,15 +21960,15 @@ __ai __attribute__((target("neon"))) int32x2_t vqabs_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqabs_s16(int16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqabs_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqabs_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -21955,16 +21976,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqabs_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -21972,16 +21993,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vqaddq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -21989,16 +22010,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vqaddq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -22006,16 +22027,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vqaddq_u64(uint64x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -22023,16 +22044,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vqaddq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -22040,16 +22061,16 @@ __ai __attribute__((target("neon"))) int8x16_t vqaddq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -22057,16 +22078,16 @@ __ai __attribute__((target("neon"))) int32x4_t vqaddq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -22074,16 +22095,16 @@ __ai __attribute__((target("neon"))) int64x2_t vqaddq_s64(int64x2_t __p0, int64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -22091,16 +22112,16 @@ __ai __attribute__((target("neon"))) int16x8_t vqaddq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -22108,38 +22129,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vqadd_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -22147,16 +22168,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vqadd_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -22164,38 +22185,38 @@ __ai __attribute__((target("neon"))) int8x8_t vqadd_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vqadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -22203,22 +22224,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqadd_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmlal_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmlal_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmlal_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 35)); return __ret; } #endif @@ -22226,22 +22247,22 @@ __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmlal_s32(int64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmlal_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmlal_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmlal_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 34)); return __ret; } #endif @@ -22261,11 +22282,11 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlal_s16(int32x4_t __ int64x2_t __s0_123 = __p0_123; \ int32x2_t __s1_123 = __p1_123; \ int32x2_t __s2_123 = __p2_123; \ - int64x2_t __rev0_123; __rev0_123 = __builtin_shufflevector(__s0_123, __s0_123, 1, 0); \ - int32x2_t __rev1_123; __rev1_123 = __builtin_shufflevector(__s1_123, __s1_123, 1, 0); \ - int32x2_t __rev2_123; __rev2_123 = __builtin_shufflevector(__s2_123, __s2_123, 1, 0); \ + int64x2_t __rev0_123; __rev0_123 = __builtin_shufflevector(__s0_123, __s0_123, __lane_reverse_128_64); \ + int32x2_t __rev1_123; __rev1_123 = __builtin_shufflevector(__s1_123, __s1_123, __lane_reverse_64_32); \ + int32x2_t __rev2_123; __rev2_123 = __builtin_shufflevector(__s2_123, __s2_123, __lane_reverse_64_32); \ __ret_123 = __noswap_vqdmlal_s32(__rev0_123, __rev1_123, __noswap_splat_lane_s32(__rev2_123, __p3_123)); \ - __ret_123 = __builtin_shufflevector(__ret_123, __ret_123, 1, 0); \ + __ret_123 = __builtin_shufflevector(__ret_123, __ret_123, __lane_reverse_128_64); \ __ret_123; \ }) #endif @@ -22285,11 +22306,11 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlal_s16(int32x4_t __ int32x4_t __s0_125 = __p0_125; \ int16x4_t __s1_125 = __p1_125; \ int16x4_t __s2_125 = __p2_125; \ - int32x4_t __rev0_125; __rev0_125 = __builtin_shufflevector(__s0_125, __s0_125, 3, 2, 1, 0); \ - int16x4_t __rev1_125; __rev1_125 = __builtin_shufflevector(__s1_125, __s1_125, 3, 2, 1, 0); \ - int16x4_t __rev2_125; __rev2_125 = __builtin_shufflevector(__s2_125, __s2_125, 3, 2, 1, 0); \ + int32x4_t __rev0_125; __rev0_125 = __builtin_shufflevector(__s0_125, __s0_125, __lane_reverse_128_32); \ + int16x4_t __rev1_125; __rev1_125 = __builtin_shufflevector(__s1_125, __s1_125, __lane_reverse_64_16); \ + int16x4_t __rev2_125; __rev2_125 = __builtin_shufflevector(__s2_125, __s2_125, __lane_reverse_64_16); \ __ret_125 = __noswap_vqdmlal_s16(__rev0_125, __rev1_125, __noswap_splat_lane_s16(__rev2_125, __p3_125)); \ - __ret_125 = __builtin_shufflevector(__ret_125, __ret_125, 3, 2, 1, 0); \ + __ret_125 = __builtin_shufflevector(__ret_125, __ret_125, __lane_reverse_128_32); \ __ret_125; \ }) #endif @@ -22303,10 +22324,10 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmlal_n_s32(int64x2_t __p0, int #else __ai __attribute__((target("neon"))) int64x2_t vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vqdmlal_s32(__rev0, __rev1, (int32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { @@ -22325,10 +22346,10 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_n_s16(int32x4_t __p0, int #else __ai __attribute__((target("neon"))) int32x4_t vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __noswap_vqdmlal_s16(__rev0, __rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { @@ -22341,22 +22362,22 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlal_n_s16(int32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmlsl_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmlsl_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmlsl_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 35)); return __ret; } #endif @@ -22364,22 +22385,22 @@ __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmlsl_s32(int64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmlsl_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmlsl_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmlsl_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 34)); return __ret; } #endif @@ -22399,11 +22420,11 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlsl_s16(int32x4_t __ int64x2_t __s0_127 = __p0_127; \ int32x2_t __s1_127 = __p1_127; \ int32x2_t __s2_127 = __p2_127; \ - int64x2_t __rev0_127; __rev0_127 = __builtin_shufflevector(__s0_127, __s0_127, 1, 0); \ - int32x2_t __rev1_127; __rev1_127 = __builtin_shufflevector(__s1_127, __s1_127, 1, 0); \ - int32x2_t __rev2_127; __rev2_127 = __builtin_shufflevector(__s2_127, __s2_127, 1, 0); \ + int64x2_t __rev0_127; __rev0_127 = __builtin_shufflevector(__s0_127, __s0_127, __lane_reverse_128_64); \ + int32x2_t __rev1_127; __rev1_127 = __builtin_shufflevector(__s1_127, __s1_127, __lane_reverse_64_32); \ + int32x2_t __rev2_127; __rev2_127 = __builtin_shufflevector(__s2_127, __s2_127, __lane_reverse_64_32); \ __ret_127 = __noswap_vqdmlsl_s32(__rev0_127, __rev1_127, __noswap_splat_lane_s32(__rev2_127, __p3_127)); \ - __ret_127 = __builtin_shufflevector(__ret_127, __ret_127, 1, 0); \ + __ret_127 = __builtin_shufflevector(__ret_127, __ret_127, __lane_reverse_128_64); \ __ret_127; \ }) #endif @@ -22423,11 +22444,11 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlsl_s16(int32x4_t __ int32x4_t __s0_129 = __p0_129; \ int16x4_t __s1_129 = __p1_129; \ int16x4_t __s2_129 = __p2_129; \ - int32x4_t __rev0_129; __rev0_129 = __builtin_shufflevector(__s0_129, __s0_129, 3, 2, 1, 0); \ - int16x4_t __rev1_129; __rev1_129 = __builtin_shufflevector(__s1_129, __s1_129, 3, 2, 1, 0); \ - int16x4_t __rev2_129; __rev2_129 = __builtin_shufflevector(__s2_129, __s2_129, 3, 2, 1, 0); \ + int32x4_t __rev0_129; __rev0_129 = __builtin_shufflevector(__s0_129, __s0_129, __lane_reverse_128_32); \ + int16x4_t __rev1_129; __rev1_129 = __builtin_shufflevector(__s1_129, __s1_129, __lane_reverse_64_16); \ + int16x4_t __rev2_129; __rev2_129 = __builtin_shufflevector(__s2_129, __s2_129, __lane_reverse_64_16); \ __ret_129 = __noswap_vqdmlsl_s16(__rev0_129, __rev1_129, __noswap_splat_lane_s16(__rev2_129, __p3_129)); \ - __ret_129 = __builtin_shufflevector(__ret_129, __ret_129, 3, 2, 1, 0); \ + __ret_129 = __builtin_shufflevector(__ret_129, __ret_129, __lane_reverse_128_32); \ __ret_129; \ }) #endif @@ -22441,10 +22462,10 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int #else __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vqdmlsl_s32(__rev0, __rev1, (int32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { @@ -22463,10 +22484,10 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int #else __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __noswap_vqdmlsl_s16(__rev0, __rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { @@ -22479,21 +22500,21 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmlsl_n_s16(int32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #endif @@ -22501,21 +22522,21 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmulhq_s32(int32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #endif @@ -22523,21 +22544,21 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vqdmulhq_s16(int16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #endif @@ -22545,21 +22566,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vqdmulh_s32(int32x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #endif @@ -22573,9 +22594,9 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmulhq_n_s32(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap_vqdmulhq_s32(__rev0, (int32x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -22589,9 +22610,9 @@ __ai __attribute__((target("neon"))) int16x8_t vqdmulhq_n_s16(int16x8_t __p0, in #else __ai __attribute__((target("neon"))) int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vqdmulhq_s16(__rev0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -22605,9 +22626,9 @@ __ai __attribute__((target("neon"))) int32x2_t vqdmulh_n_s32(int32x2_t __p0, int #else __ai __attribute__((target("neon"))) int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __noswap_vqdmulh_s32(__rev0, (int32x2_t) {__p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -22621,9 +22642,9 @@ __ai __attribute__((target("neon"))) int16x4_t vqdmulh_n_s16(int16x4_t __p0, int #else __ai __attribute__((target("neon"))) int16x4_t vqdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __noswap_vqdmulh_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -22631,21 +22652,21 @@ __ai __attribute__((target("neon"))) int16x4_t vqdmulh_n_s16(int16x4_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqdmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 35)); return __ret; } #endif @@ -22653,21 +22674,21 @@ __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmull_s32(int32x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__rev0, (int8x8_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmull_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmull_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 34)); return __ret; } #endif @@ -22685,10 +22706,10 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmull_s16(int16x4_t __ int64x2_t __ret_131; \ int32x2_t __s0_131 = __p0_131; \ int32x2_t __s1_131 = __p1_131; \ - int32x2_t __rev0_131; __rev0_131 = __builtin_shufflevector(__s0_131, __s0_131, 1, 0); \ - int32x2_t __rev1_131; __rev1_131 = __builtin_shufflevector(__s1_131, __s1_131, 1, 0); \ + int32x2_t __rev0_131; __rev0_131 = __builtin_shufflevector(__s0_131, __s0_131, __lane_reverse_64_32); \ + int32x2_t __rev1_131; __rev1_131 = __builtin_shufflevector(__s1_131, __s1_131, __lane_reverse_64_32); \ __ret_131 = __noswap_vqdmull_s32(__rev0_131, __noswap_splat_lane_s32(__rev1_131, __p2_131)); \ - __ret_131 = __builtin_shufflevector(__ret_131, __ret_131, 1, 0); \ + __ret_131 = __builtin_shufflevector(__ret_131, __ret_131, __lane_reverse_128_64); \ __ret_131; \ }) #endif @@ -22706,10 +22727,10 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmull_s16(int16x4_t __ int32x4_t __ret_133; \ int16x4_t __s0_133 = __p0_133; \ int16x4_t __s1_133 = __p1_133; \ - int16x4_t __rev0_133; __rev0_133 = __builtin_shufflevector(__s0_133, __s0_133, 3, 2, 1, 0); \ - int16x4_t __rev1_133; __rev1_133 = __builtin_shufflevector(__s1_133, __s1_133, 3, 2, 1, 0); \ + int16x4_t __rev0_133; __rev0_133 = __builtin_shufflevector(__s0_133, __s0_133, __lane_reverse_64_16); \ + int16x4_t __rev1_133; __rev1_133 = __builtin_shufflevector(__s1_133, __s1_133, __lane_reverse_64_16); \ __ret_133 = __noswap_vqdmull_s16(__rev0_133, __noswap_splat_lane_s16(__rev1_133, __p2_133)); \ - __ret_133 = __builtin_shufflevector(__ret_133, __ret_133, 3, 2, 1, 0); \ + __ret_133 = __builtin_shufflevector(__ret_133, __ret_133, __lane_reverse_128_32); \ __ret_133; \ }) #endif @@ -22723,9 +22744,9 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmull_n_s32(int32x2_t __p0, int #else __ai __attribute__((target("neon"))) int64x2_t vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __noswap_vqdmull_s32(__rev0, (int32x2_t) {__p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { @@ -22744,9 +22765,9 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmull_n_s16(int16x4_t __p0, int #else __ai __attribute__((target("neon"))) int32x4_t vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __noswap_vqdmull_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { @@ -22759,20 +22780,20 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqdmull_n_s16(int16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 17)); return __ret; } #endif @@ -22780,20 +22801,20 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vqmovn_u32(uint32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 18)); return __ret; } #endif @@ -22801,20 +22822,20 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vqmovn_u64(uint64x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 16)); return __ret; } #endif @@ -22822,20 +22843,20 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vqmovn_u16(uint16x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 1)); return __ret; } #endif @@ -22843,20 +22864,20 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vqmovn_s32(int32x4_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 2)); return __ret; } #endif @@ -22864,20 +22885,20 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vqmovn_s64(int64x2_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqmovn_v(__builtin_bit_cast(int8x16_t, __p0), 0)); return __ret; } #endif @@ -22885,20 +22906,20 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vqmovn_s16(int16x8_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __p0), 17)); return __ret; } #endif @@ -22906,20 +22927,20 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vqmovun_s32(int32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __p0), 18)); return __ret; } #endif @@ -22927,20 +22948,20 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vqmovun_s64(int64x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqmovun_v(__builtin_bit_cast(int8x16_t, __p0), 16)); return __ret; } #endif @@ -22948,15 +22969,15 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vqmovun_s16(int16x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqnegq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqnegq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -22964,15 +22985,15 @@ __ai __attribute__((target("neon"))) int8x16_t vqnegq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqnegq_s32(int32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqnegq_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -22980,15 +23001,15 @@ __ai __attribute__((target("neon"))) int32x4_t vqnegq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqnegq_s16(int16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqnegq_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -22996,15 +23017,15 @@ __ai __attribute__((target("neon"))) int16x8_t vqnegq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqneg_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqneg_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqneg_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -23012,15 +23033,15 @@ __ai __attribute__((target("neon"))) int8x8_t vqneg_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqneg_s32(int32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqneg_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqneg_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -23028,15 +23049,15 @@ __ai __attribute__((target("neon"))) int32x2_t vqneg_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqneg_s16(int16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqneg_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqneg_v((int8x8_t)__rev0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -23044,21 +23065,21 @@ __ai __attribute__((target("neon"))) int16x4_t vqneg_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #endif @@ -23066,21 +23087,21 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vqrdmulhq_s32(int32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #endif @@ -23088,21 +23109,21 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vqrdmulhq_s16(int16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #endif @@ -23110,21 +23131,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vqrdmulh_s32(int32x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #endif @@ -23138,9 +23159,9 @@ __ai __attribute__((target("neon"))) int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, i #else __ai __attribute__((target("neon"))) int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap_vqrdmulhq_s32(__rev0, (int32x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -23154,9 +23175,9 @@ __ai __attribute__((target("neon"))) int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, i #else __ai __attribute__((target("neon"))) int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vqrdmulhq_s16(__rev0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -23170,9 +23191,9 @@ __ai __attribute__((target("neon"))) int32x2_t vqrdmulh_n_s32(int32x2_t __p0, in #else __ai __attribute__((target("neon"))) int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __noswap_vqrdmulh_s32(__rev0, (int32x2_t) {__p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -23186,9 +23207,9 @@ __ai __attribute__((target("neon"))) int16x4_t vqrdmulh_n_s16(int16x4_t __p0, in #else __ai __attribute__((target("neon"))) int16x4_t vqrdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __noswap_vqrdmulh_s16(__rev0, (int16x4_t) {__p1, __p1, __p1, __p1}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -23196,16 +23217,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqrdmulh_n_s16(int16x4_t __p0, in #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -23213,16 +23234,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vqrshlq_u8(uint8x16_t __p0, int8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vqrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vqrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -23230,16 +23251,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vqrshlq_u32(uint32x4_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vqrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vqrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -23247,16 +23268,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vqrshlq_u64(uint64x2_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -23264,16 +23285,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vqrshlq_u16(uint16x8_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -23281,16 +23302,16 @@ __ai __attribute__((target("neon"))) int8x16_t vqrshlq_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -23298,16 +23319,16 @@ __ai __attribute__((target("neon"))) int32x4_t vqrshlq_s32(int32x4_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -23315,16 +23336,16 @@ __ai __attribute__((target("neon"))) int64x2_t vqrshlq_s64(int64x2_t __p0, int64 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -23332,16 +23353,16 @@ __ai __attribute__((target("neon"))) int16x8_t vqrshlq_s16(int16x8_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -23349,38 +23370,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -23388,16 +23409,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vqrshl_u16(uint16x4_t __p0, int1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -23405,38 +23426,38 @@ __ai __attribute__((target("neon"))) int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -23445,22 +23466,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vqrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -23469,22 +23490,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vqrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -23493,22 +23514,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vqrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -23517,22 +23538,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vqrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #endif @@ -23541,22 +23562,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vqrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #endif @@ -23565,22 +23586,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vqrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vqrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #endif @@ -23589,22 +23610,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vqrshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -23613,22 +23634,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vqrshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -23637,22 +23658,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vqrshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqrshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqrshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -23660,16 +23681,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqrshl_s16(int16x4_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -23677,16 +23698,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -23694,16 +23715,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vqshlq_u32(uint32x4_t __p0, int3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -23711,16 +23732,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vqshlq_u64(uint64x2_t __p0, int6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -23728,16 +23749,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vqshlq_u16(uint16x8_t __p0, int1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -23745,16 +23766,16 @@ __ai __attribute__((target("neon"))) int8x16_t vqshlq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -23762,16 +23783,16 @@ __ai __attribute__((target("neon"))) int32x4_t vqshlq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -23779,16 +23800,16 @@ __ai __attribute__((target("neon"))) int64x2_t vqshlq_s64(int64x2_t __p0, int64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -23796,16 +23817,16 @@ __ai __attribute__((target("neon"))) int16x8_t vqshlq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -23813,38 +23834,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -23852,16 +23873,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vqshl_u16(uint16x4_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -23869,38 +23890,38 @@ __ai __attribute__((target("neon"))) int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -23909,16 +23930,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define vqshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -23927,16 +23948,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vqshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -23945,16 +23966,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vqshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -23963,16 +23984,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vqshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -23981,16 +24002,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #else #define vqshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -23999,16 +24020,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vqshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -24017,16 +24038,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vqshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -24035,16 +24056,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vqshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vqshlq_n_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -24053,16 +24074,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vqshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -24071,16 +24092,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vqshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -24088,23 +24109,23 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vqshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -24113,16 +24134,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vqshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -24131,16 +24152,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vqshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -24148,23 +24169,23 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vqshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqshl_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -24173,16 +24194,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define vqshluq_n_s8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -24191,16 +24212,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vqshluq_n_s32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -24209,16 +24230,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vqshluq_n_s64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -24227,16 +24248,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vqshluq_n_s16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vqshluq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqshluq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -24245,16 +24266,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vqshlu_n_s8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -24263,16 +24284,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vqshlu_n_s32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -24280,23 +24301,23 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshlu_n_s64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vqshlu_n_s16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vqshlu_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshlu_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -24305,22 +24326,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vqshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -24329,22 +24350,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vqshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -24353,22 +24374,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vqshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -24377,22 +24398,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vqshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #endif @@ -24401,22 +24422,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vqshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #endif @@ -24425,22 +24446,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vqshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vqshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #endif @@ -24449,22 +24470,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vqshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vqshrun_n_s32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -24473,22 +24494,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vqshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vqshrun_n_s64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -24497,22 +24518,22 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vqshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vqshrun_n_s16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vqshrun_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqshrun_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -24520,16 +24541,16 @@ __ai __attribute__((target("neon"))) int16x4_t vqshl_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -24537,16 +24558,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -24554,16 +24575,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vqsubq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -24571,16 +24592,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vqsubq_u64(uint64x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -24588,16 +24609,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vqsubq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -24605,16 +24626,16 @@ __ai __attribute__((target("neon"))) int8x16_t vqsubq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -24622,16 +24643,16 @@ __ai __attribute__((target("neon"))) int32x4_t vqsubq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -24639,16 +24660,16 @@ __ai __attribute__((target("neon"))) int64x2_t vqsubq_s64(int64x2_t __p0, int64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqsubq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -24656,16 +24677,16 @@ __ai __attribute__((target("neon"))) int16x8_t vqsubq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -24673,38 +24694,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -24712,16 +24733,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vqsub_u16(uint16x4_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -24729,38 +24750,38 @@ __ai __attribute__((target("neon"))) int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqsub_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -24768,21 +24789,21 @@ __ai __attribute__((target("neon"))) int16x4_t vqsub_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #endif @@ -24790,21 +24811,21 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vraddhn_u32(uint32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #endif @@ -24812,21 +24833,21 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vraddhn_u64(uint64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #endif @@ -24834,21 +24855,21 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vraddhn_u16(uint16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #endif @@ -24856,21 +24877,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vraddhn_s32(int32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #endif @@ -24878,21 +24899,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vraddhn_s64(int64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vraddhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #endif @@ -24900,15 +24921,15 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vraddhn_s16(int16x8_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vrecpeq_u32(uint32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrecpeq_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vrecpeq_u32(uint32x4_t __p0) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrecpeq_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -24916,15 +24937,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vrecpeq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrecpeq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrecpeq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrecpeq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrecpeq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -24932,15 +24953,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrecpeq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrecpe_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrecpe_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -24948,15 +24969,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vrecpe_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrecpe_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrecpe_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrecpe_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrecpe_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -24964,16 +24985,16 @@ __ai __attribute__((target("neon"))) float32x2_t vrecpe_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrecpsq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrecpsq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -24981,16 +25002,16 @@ __ai __attribute__((target("neon"))) float32x4_t vrecpsq_f32(float32x4_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrecps_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrecps_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -25004,9 +25025,9 @@ __ai __attribute__((target("neon"))) poly8x8_t vrev16_p8(poly8x8_t __p0) { #else __ai __attribute__((target("neon"))) poly8x8_t vrev16_p8(poly8x8_t __p0) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25020,9 +25041,9 @@ __ai __attribute__((target("neon"))) poly8x16_t vrev16q_p8(poly8x16_t __p0) { #else __ai __attribute__((target("neon"))) poly8x16_t vrev16q_p8(poly8x16_t __p0) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25036,9 +25057,9 @@ __ai __attribute__((target("neon"))) uint8x16_t vrev16q_u8(uint8x16_t __p0) { #else __ai __attribute__((target("neon"))) uint8x16_t vrev16q_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25052,9 +25073,9 @@ __ai __attribute__((target("neon"))) int8x16_t vrev16q_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x16_t vrev16q_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25068,9 +25089,9 @@ __ai __attribute__((target("neon"))) uint8x8_t vrev16_u8(uint8x8_t __p0) { #else __ai __attribute__((target("neon"))) uint8x8_t vrev16_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25084,9 +25105,9 @@ __ai __attribute__((target("neon"))) int8x8_t vrev16_s8(int8x8_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vrev16_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25100,9 +25121,9 @@ __ai __attribute__((target("neon"))) poly8x8_t vrev32_p8(poly8x8_t __p0) { #else __ai __attribute__((target("neon"))) poly8x8_t vrev32_p8(poly8x8_t __p0) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25116,9 +25137,9 @@ __ai __attribute__((target("neon"))) poly16x4_t vrev32_p16(poly16x4_t __p0) { #else __ai __attribute__((target("neon"))) poly16x4_t vrev32_p16(poly16x4_t __p0) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25132,9 +25153,9 @@ __ai __attribute__((target("neon"))) poly8x16_t vrev32q_p8(poly8x16_t __p0) { #else __ai __attribute__((target("neon"))) poly8x16_t vrev32q_p8(poly8x16_t __p0) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25148,9 +25169,9 @@ __ai __attribute__((target("neon"))) poly16x8_t vrev32q_p16(poly16x8_t __p0) { #else __ai __attribute__((target("neon"))) poly16x8_t vrev32q_p16(poly16x8_t __p0) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25164,9 +25185,9 @@ __ai __attribute__((target("neon"))) uint8x16_t vrev32q_u8(uint8x16_t __p0) { #else __ai __attribute__((target("neon"))) uint8x16_t vrev32q_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25180,9 +25201,9 @@ __ai __attribute__((target("neon"))) uint16x8_t vrev32q_u16(uint16x8_t __p0) { #else __ai __attribute__((target("neon"))) uint16x8_t vrev32q_u16(uint16x8_t __p0) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25196,9 +25217,9 @@ __ai __attribute__((target("neon"))) int8x16_t vrev32q_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x16_t vrev32q_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25212,9 +25233,9 @@ __ai __attribute__((target("neon"))) int16x8_t vrev32q_s16(int16x8_t __p0) { #else __ai __attribute__((target("neon"))) int16x8_t vrev32q_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25228,9 +25249,9 @@ __ai __attribute__((target("neon"))) uint8x8_t vrev32_u8(uint8x8_t __p0) { #else __ai __attribute__((target("neon"))) uint8x8_t vrev32_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25244,9 +25265,9 @@ __ai __attribute__((target("neon"))) uint16x4_t vrev32_u16(uint16x4_t __p0) { #else __ai __attribute__((target("neon"))) uint16x4_t vrev32_u16(uint16x4_t __p0) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25260,9 +25281,9 @@ __ai __attribute__((target("neon"))) int8x8_t vrev32_s8(int8x8_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vrev32_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25276,9 +25297,9 @@ __ai __attribute__((target("neon"))) int16x4_t vrev32_s16(int16x4_t __p0) { #else __ai __attribute__((target("neon"))) int16x4_t vrev32_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25292,9 +25313,9 @@ __ai __attribute__((target("neon"))) poly8x8_t vrev64_p8(poly8x8_t __p0) { #else __ai __attribute__((target("neon"))) poly8x8_t vrev64_p8(poly8x8_t __p0) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25308,9 +25329,9 @@ __ai __attribute__((target("neon"))) poly16x4_t vrev64_p16(poly16x4_t __p0) { #else __ai __attribute__((target("neon"))) poly16x4_t vrev64_p16(poly16x4_t __p0) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25324,9 +25345,9 @@ __ai __attribute__((target("neon"))) poly8x16_t vrev64q_p8(poly8x16_t __p0) { #else __ai __attribute__((target("neon"))) poly8x16_t vrev64q_p8(poly8x16_t __p0) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25340,9 +25361,9 @@ __ai __attribute__((target("neon"))) poly16x8_t vrev64q_p16(poly16x8_t __p0) { #else __ai __attribute__((target("neon"))) poly16x8_t vrev64q_p16(poly16x8_t __p0) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25356,9 +25377,9 @@ __ai __attribute__((target("neon"))) uint8x16_t vrev64q_u8(uint8x16_t __p0) { #else __ai __attribute__((target("neon"))) uint8x16_t vrev64q_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25372,9 +25393,9 @@ __ai __attribute__((target("neon"))) uint32x4_t vrev64q_u32(uint32x4_t __p0) { #else __ai __attribute__((target("neon"))) uint32x4_t vrev64q_u32(uint32x4_t __p0) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25388,9 +25409,9 @@ __ai __attribute__((target("neon"))) uint16x8_t vrev64q_u16(uint16x8_t __p0) { #else __ai __attribute__((target("neon"))) uint16x8_t vrev64q_u16(uint16x8_t __p0) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25404,9 +25425,9 @@ __ai __attribute__((target("neon"))) int8x16_t vrev64q_s8(int8x16_t __p0) { #else __ai __attribute__((target("neon"))) int8x16_t vrev64q_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25420,9 +25441,9 @@ __ai __attribute__((target("neon"))) float32x4_t vrev64q_f32(float32x4_t __p0) { #else __ai __attribute__((target("neon"))) float32x4_t vrev64q_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25436,9 +25457,9 @@ __ai __attribute__((target("neon"))) int32x4_t vrev64q_s32(int32x4_t __p0) { #else __ai __attribute__((target("neon"))) int32x4_t vrev64q_s32(int32x4_t __p0) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25452,9 +25473,9 @@ __ai __attribute__((target("neon"))) int16x8_t vrev64q_s16(int16x8_t __p0) { #else __ai __attribute__((target("neon"))) int16x8_t vrev64q_s16(int16x8_t __p0) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25468,9 +25489,9 @@ __ai __attribute__((target("neon"))) uint8x8_t vrev64_u8(uint8x8_t __p0) { #else __ai __attribute__((target("neon"))) uint8x8_t vrev64_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25484,9 +25505,9 @@ __ai __attribute__((target("neon"))) uint32x2_t vrev64_u32(uint32x2_t __p0) { #else __ai __attribute__((target("neon"))) uint32x2_t vrev64_u32(uint32x2_t __p0) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -25500,9 +25521,9 @@ __ai __attribute__((target("neon"))) uint16x4_t vrev64_u16(uint16x4_t __p0) { #else __ai __attribute__((target("neon"))) uint16x4_t vrev64_u16(uint16x4_t __p0) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25516,9 +25537,9 @@ __ai __attribute__((target("neon"))) int8x8_t vrev64_s8(int8x8_t __p0) { #else __ai __attribute__((target("neon"))) int8x8_t vrev64_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25532,9 +25553,9 @@ __ai __attribute__((target("neon"))) float32x2_t vrev64_f32(float32x2_t __p0) { #else __ai __attribute__((target("neon"))) float32x2_t vrev64_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -25548,9 +25569,9 @@ __ai __attribute__((target("neon"))) int32x2_t vrev64_s32(int32x2_t __p0) { #else __ai __attribute__((target("neon"))) int32x2_t vrev64_s32(int32x2_t __p0) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -25564,9 +25585,9 @@ __ai __attribute__((target("neon"))) int16x4_t vrev64_s16(int16x4_t __p0) { #else __ai __attribute__((target("neon"))) int16x4_t vrev64_s16(int16x4_t __p0) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25580,9 +25601,9 @@ __ai __attribute__((target("neon"))) float16x8_t vrev64q_f16(float16x8_t __p0) { #else __ai __attribute__((target("neon"))) float16x8_t vrev64q_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25596,9 +25617,9 @@ __ai __attribute__((target("neon"))) float16x4_t vrev64_f16(float16x4_t __p0) { #else __ai __attribute__((target("neon"))) float16x4_t vrev64_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25606,16 +25627,16 @@ __ai __attribute__((target("neon"))) float16x4_t vrev64_f16(float16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25623,16 +25644,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25640,16 +25661,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vrhaddq_u32(uint32x4_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25657,16 +25678,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vrhaddq_u16(uint16x8_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25674,16 +25695,16 @@ __ai __attribute__((target("neon"))) int8x16_t vrhaddq_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25691,16 +25712,16 @@ __ai __attribute__((target("neon"))) int32x4_t vrhaddq_s32(int32x4_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrhaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25708,16 +25729,16 @@ __ai __attribute__((target("neon"))) int16x8_t vrhaddq_s16(int16x8_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25725,16 +25746,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -25742,16 +25763,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vrhadd_u32(uint32x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25759,16 +25780,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vrhadd_u16(uint16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25776,16 +25797,16 @@ __ai __attribute__((target("neon"))) int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -25793,16 +25814,16 @@ __ai __attribute__((target("neon"))) int32x2_t vrhadd_s32(int32x2_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrhadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -25810,16 +25831,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrhadd_s16(int16x4_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25827,16 +25848,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25844,16 +25865,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vrshlq_u32(uint32x4_t __p0, int3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -25861,16 +25882,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vrshlq_u64(uint64x2_t __p0, int6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25878,16 +25899,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vrshlq_u16(uint16x8_t __p0, int1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -25895,16 +25916,16 @@ __ai __attribute__((target("neon"))) int8x16_t vrshlq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -25912,16 +25933,16 @@ __ai __attribute__((target("neon"))) int32x4_t vrshlq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -25929,16 +25950,16 @@ __ai __attribute__((target("neon"))) int64x2_t vrshlq_s64(int64x2_t __p0, int64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -25946,16 +25967,16 @@ __ai __attribute__((target("neon"))) int16x8_t vrshlq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -25963,38 +25984,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -26002,16 +26023,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vrshl_u16(uint16x4_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -26019,38 +26040,38 @@ __ai __attribute__((target("neon"))) int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -26059,16 +26080,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define vrshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -26077,16 +26098,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vrshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -26095,16 +26116,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vrshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -26113,16 +26134,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vrshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -26131,16 +26152,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #else #define vrshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -26149,16 +26170,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vrshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -26167,16 +26188,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vrshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -26185,16 +26206,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vrshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vrshrq_n_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -26203,16 +26224,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vrshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -26221,16 +26242,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vrshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -26238,23 +26259,23 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshr_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vrshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -26263,16 +26284,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vrshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -26281,16 +26302,16 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vrshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -26298,23 +26319,23 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshr_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vrshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vrshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vrshr_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -26323,22 +26344,22 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vrshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -26347,22 +26368,22 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vrshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -26371,22 +26392,22 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vrshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -26395,22 +26416,22 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vrshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #endif @@ -26419,22 +26440,22 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vrshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #endif @@ -26443,22 +26464,22 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vrshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vrshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #endif @@ -26466,15 +26487,15 @@ __ai __attribute__((target("neon"))) int16x4_t vrshl_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrsqrteq_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrsqrteq_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -26482,15 +26503,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrsqrteq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrsqrteq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrsqrteq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrsqrteq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -26498,15 +26519,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrsqrteq_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsqrte_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsqrte_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -26514,15 +26535,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vrsqrte_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrsqrte_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrsqrte_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrsqrte_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrsqrte_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -26530,16 +26551,16 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrte_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrsqrtsq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrsqrtsq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -26547,16 +26568,16 @@ __ai __attribute__((target("neon"))) float32x4_t vrsqrtsq_f32(float32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrsqrts_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrsqrts_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -26566,7 +26587,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -26574,10 +26595,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -26587,7 +26608,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 50)); \ __ret; \ }) #else @@ -26595,10 +26616,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -26608,7 +26629,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else @@ -26616,10 +26637,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -26629,7 +26650,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -26637,10 +26658,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -26650,7 +26671,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -26658,10 +26679,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -26671,7 +26692,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -26679,10 +26700,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -26692,7 +26713,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else @@ -26700,10 +26721,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -26713,7 +26734,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -26721,10 +26742,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vrsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vrsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -26734,7 +26755,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 16)); \ __ret; \ }) #else @@ -26742,10 +26763,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -26755,7 +26776,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 18)); \ __ret; \ }) #else @@ -26763,10 +26784,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -26775,7 +26796,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -26783,7 +26804,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 17)); \ __ret; \ }) #else @@ -26791,10 +26812,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -26804,7 +26825,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 0)); \ __ret; \ }) #else @@ -26812,10 +26833,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -26825,7 +26846,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -26833,10 +26854,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -26845,7 +26866,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -26853,7 +26874,7 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -26861,10 +26882,10 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vrsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -26872,21 +26893,21 @@ __ai __attribute__((target("neon"))) float32x2_t vrsqrts_f32(float32x2_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #endif @@ -26894,21 +26915,21 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vrsubhn_u32(uint32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #endif @@ -26916,21 +26937,21 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vrsubhn_u64(uint64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #endif @@ -26938,21 +26959,21 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vrsubhn_u16(uint16x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #endif @@ -26960,21 +26981,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vrsubhn_s32(int32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #endif @@ -26982,21 +27003,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vrsubhn_s64(int64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #endif @@ -27006,7 +27027,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -27014,16 +27035,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vset_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __ret; \ poly8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = (poly8x8_t) __builtin_neon_vset_lane_i8(__s0, (poly8x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vset_lane_i8(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -27033,7 +27054,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -27041,16 +27062,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vset_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __ret; \ poly16_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = (poly16x4_t) __builtin_neon_vset_lane_i16(__s0, (poly16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vset_lane_i16(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -27060,7 +27081,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -27068,16 +27089,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_vsetq_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __ret; \ poly8_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (poly8x16_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -27087,7 +27108,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -27095,16 +27116,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vsetq_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __ret; \ poly16_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (poly16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -27114,7 +27135,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint8x16_t __ret; \ uint8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \ __ret; \ }) #else @@ -27122,16 +27143,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint8x16_t __ret; \ uint8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_vsetq_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __ret; \ uint8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27141,7 +27162,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint32x4_t __ret; \ uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsetq_lane_i32(__s0, __builtin_bit_cast(int32x4_t, __s1), __p2)); \ __ret; \ }) #else @@ -27149,16 +27170,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint32x4_t __ret; \ uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsetq_lane_i32(__s0, __builtin_bit_cast(int32x4_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vsetq_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __ret; \ uint32_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsetq_lane_i32(__s0, __builtin_bit_cast(int32x4_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27168,7 +27189,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint64x2_t __ret; \ uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \ __ret; \ }) #else @@ -27176,16 +27197,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint64x2_t __ret; \ uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vsetq_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27195,7 +27216,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint16x8_t __ret; \ uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \ __ret; \ }) #else @@ -27203,16 +27224,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint16x8_t __ret; \ uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vsetq_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __ret; \ uint16_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27222,7 +27243,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int8x16_t __ret; \ int8_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \ __ret; \ }) #else @@ -27230,16 +27251,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int8x16_t __ret; \ int8_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #define __noswap_vsetq_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __ret; \ int8_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vsetq_lane_i8(__s0, (int8x16_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsetq_lane_i8(__s0, __builtin_bit_cast(int8x16_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27249,7 +27270,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p float32x4_t __ret; \ float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vsetq_lane_f32(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -27257,16 +27278,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p float32x4_t __ret; \ float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vsetq_lane_f32(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vsetq_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __ret; \ float32_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ - __ret = (float32x4_t) __builtin_neon_vsetq_lane_f32(__s0, (float32x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vsetq_lane_f32(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -27276,7 +27297,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int32x4_t __ret; \ int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsetq_lane_i32(__s0, __builtin_bit_cast(int32x4_t, __s1), __p2)); \ __ret; \ }) #else @@ -27284,16 +27305,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int32x4_t __ret; \ int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsetq_lane_i32(__s0, __builtin_bit_cast(int32x4_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vsetq_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __ret; \ int32_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vsetq_lane_i32(__s0, (int32x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsetq_lane_i32(__s0, __builtin_bit_cast(int32x4_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27303,7 +27324,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int64x2_t __ret; \ int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \ __ret; \ }) #else @@ -27311,16 +27332,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int64x2_t __ret; \ int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vsetq_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (int64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __builtin_bit_cast(int64x2_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27330,7 +27351,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int16x8_t __ret; \ int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \ __ret; \ }) #else @@ -27338,16 +27359,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int16x8_t __ret; \ int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vsetq_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __ret; \ int16_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vsetq_lane_i16(__s0, (int16x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsetq_lane_i16(__s0, __builtin_bit_cast(int16x8_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27357,7 +27378,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint8x8_t __ret; \ uint8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \ __ret; \ }) #else @@ -27365,16 +27386,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint8x8_t __ret; \ uint8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vset_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __ret; \ uint8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27384,7 +27405,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint32x2_t __ret; \ uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vset_lane_i32(__s0, __builtin_bit_cast(int32x2_t, __s1), __p2)); \ __ret; \ }) #else @@ -27392,16 +27413,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint32x2_t __ret; \ uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vset_lane_i32(__s0, __builtin_bit_cast(int32x2_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vset_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __ret; \ uint32_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vset_lane_i32(__s0, __builtin_bit_cast(int32x2_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27410,7 +27431,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint64x1_t __ret; \ uint64_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vset_lane_i64(__s0, (int64x1_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vset_lane_i64(__s0, __builtin_bit_cast(int64x1_t, __s1), __p2)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -27418,7 +27439,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint16x4_t __ret; \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \ __ret; \ }) #else @@ -27426,16 +27447,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p uint16x4_t __ret; \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vset_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __ret; \ uint16_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27445,7 +27466,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int8x8_t __ret; \ int8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \ __ret; \ }) #else @@ -27453,16 +27474,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int8x8_t __ret; \ int8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vset_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __ret; \ int8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vset_lane_i8(__s0, (int8x8_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vset_lane_i8(__s0, __builtin_bit_cast(int8x8_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27472,7 +27493,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p float32x2_t __ret; \ float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vset_lane_f32(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -27480,16 +27501,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p float32x2_t __ret; \ float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vset_lane_f32(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vset_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __ret; \ float32_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ - __ret = (float32x2_t) __builtin_neon_vset_lane_f32(__s0, (float32x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vset_lane_f32(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -27499,7 +27520,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int32x2_t __ret; \ int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vset_lane_i32(__s0, __builtin_bit_cast(int32x2_t, __s1), __p2)); \ __ret; \ }) #else @@ -27507,16 +27528,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int32x2_t __ret; \ int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vset_lane_i32(__s0, __builtin_bit_cast(int32x2_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vset_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __ret; \ int32_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vset_lane_i32(__s0, (int32x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vset_lane_i32(__s0, __builtin_bit_cast(int32x2_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27525,7 +27546,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int64x1_t __ret; \ int64_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vset_lane_i64(__s0, (int64x1_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vset_lane_i64(__s0, __builtin_bit_cast(int64x1_t, __s1), __p2)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -27533,7 +27554,7 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int16x4_t __ret; \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \ __ret; \ }) #else @@ -27541,16 +27562,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p int16x4_t __ret; \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __rev1), __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vset_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __ret; \ int16_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vset_lane_i16(__s0, (int16x4_t)__s1, __p2); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vset_lane_i16(__s0, __builtin_bit_cast(int16x4_t, __s1), __p2)); \ __ret; \ }) #endif @@ -27558,16 +27579,16 @@ __ai __attribute__((target("neon"))) int8x8_t __noswap_vrsubhn_s16(int16x8_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -27575,16 +27596,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vshlq_u8(uint8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -27592,16 +27613,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vshlq_u32(uint32x4_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -27609,16 +27630,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vshlq_u64(uint64x2_t __p0, int64 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -27626,16 +27647,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vshlq_u16(uint16x8_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -27643,16 +27664,16 @@ __ai __attribute__((target("neon"))) int8x16_t vshlq_s8(int8x16_t __p0, int8x16_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -27660,16 +27681,16 @@ __ai __attribute__((target("neon"))) int32x4_t vshlq_s32(int32x4_t __p0, int32x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -27677,16 +27698,16 @@ __ai __attribute__((target("neon"))) int64x2_t vshlq_s64(int64x2_t __p0, int64x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshlq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -27694,16 +27715,16 @@ __ai __attribute__((target("neon"))) int16x8_t vshlq_s16(int16x8_t __p0, int16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -27711,38 +27732,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -27750,16 +27771,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vshl_u16(uint16x4_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -27767,38 +27788,38 @@ __ai __attribute__((target("neon"))) int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -27807,16 +27828,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define vshlq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -27825,16 +27846,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vshlq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -27843,16 +27864,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vshlq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -27861,16 +27882,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vshlq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -27879,16 +27900,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #else #define vshlq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -27897,16 +27918,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vshlq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -27915,16 +27936,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vshlq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -27933,16 +27954,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vshlq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vshlq_n_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshlq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -27951,16 +27972,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vshl_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -27969,16 +27990,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vshl_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -27986,23 +28007,23 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshl_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vshl_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -28011,16 +28032,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vshl_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -28029,16 +28050,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vshl_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -28046,23 +28067,23 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshl_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vshl_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vshl_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshl_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -28071,22 +28092,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshll_n_u8(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vshll_n_u8(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vshll_n_u8(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 49)); \ __ret; \ }) #endif @@ -28095,22 +28116,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshll_n_u32(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vshll_n_u32(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vshll_n_u32(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 51)); \ __ret; \ }) #endif @@ -28119,22 +28140,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshll_n_u16(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vshll_n_u16(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vshll_n_u16(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 50)); \ __ret; \ }) #endif @@ -28143,22 +28164,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshll_n_s8(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vshll_n_s8(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vshll_n_s8(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 33)); \ __ret; \ }) #endif @@ -28167,22 +28188,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshll_n_s32(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vshll_n_s32(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vshll_n_s32(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 35)); \ __ret; \ }) #endif @@ -28191,22 +28212,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshll_n_s16(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vshll_n_s16(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vshll_n_s16(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshll_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 34)); \ __ret; \ }) #endif @@ -28215,16 +28236,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 48)); \ __ret; \ }) #else #define vshrq_n_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -28233,16 +28254,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 50)); \ __ret; \ }) #else #define vshrq_n_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -28251,16 +28272,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vshrq_n_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -28269,16 +28290,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 49)); \ __ret; \ }) #else #define vshrq_n_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -28287,16 +28308,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 32)); \ __ret; \ }) #else #define vshrq_n_s8(__p0, __p1) __extension__ ({ \ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -28305,16 +28326,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 34)); \ __ret; \ }) #else #define vshrq_n_s32(__p0, __p1) __extension__ ({ \ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -28323,16 +28344,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vshrq_n_s64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -28341,16 +28362,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, __p1, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 33)); \ __ret; \ }) #else #define vshrq_n_s16(__p0, __p1) __extension__ ({ \ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vshrq_n_v((int8x16_t)__rev0, __p1, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vshrq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -28359,16 +28380,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vshr_n_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -28377,16 +28398,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vshr_n_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -28394,23 +28415,23 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshr_n_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vshr_n_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -28419,16 +28440,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vshr_n_s8(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -28437,16 +28458,16 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vshr_n_s32(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -28454,23 +28475,23 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshr_n_s64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vshr_n_s16(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vshr_n_v((int8x8_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshr_n_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -28479,22 +28500,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #else #define vshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vshrn_n_u32(__p0, __p1) __extension__ ({ \ uint16x4_t __ret; \ uint32x4_t __s0 = __p0; \ - __ret = (uint16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 17)); \ __ret; \ }) #endif @@ -28503,22 +28524,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #else #define vshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vshrn_n_u64(__p0, __p1) __extension__ ({ \ uint32x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (uint32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 18)); \ __ret; \ }) #endif @@ -28527,22 +28548,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #else #define vshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vshrn_n_u16(__p0, __p1) __extension__ ({ \ uint8x8_t __ret; \ uint16x8_t __s0 = __p0; \ - __ret = (uint8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 16)); \ __ret; \ }) #endif @@ -28551,22 +28572,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #else #define vshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vshrn_n_s32(__p0, __p1) __extension__ ({ \ int16x4_t __ret; \ int32x4_t __s0 = __p0; \ - __ret = (int16x4_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 1)); \ __ret; \ }) #endif @@ -28575,22 +28596,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #else #define vshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vshrn_n_s64(__p0, __p1) __extension__ ({ \ int32x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (int32x2_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 2)); \ __ret; \ }) #endif @@ -28599,22 +28620,22 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #define vshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #else #define vshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__rev0, __p1, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #define __noswap_vshrn_n_s16(__p0, __p1) __extension__ ({ \ int8x8_t __ret; \ int16x8_t __s0 = __p0; \ - __ret = (int8x8_t) __builtin_neon_vshrn_n_v((int8x16_t)__s0, __p1, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vshrn_n_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 0)); \ __ret; \ }) #endif @@ -28624,7 +28645,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 4)); \ __ret; \ }) #else @@ -28632,10 +28653,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -28645,7 +28666,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 5)); \ __ret; \ }) #else @@ -28653,10 +28674,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -28666,7 +28687,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -28674,10 +28695,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -28687,7 +28708,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -28695,10 +28716,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -28708,7 +28729,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -28716,10 +28737,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -28729,7 +28750,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 50)); \ __ret; \ }) #else @@ -28737,10 +28758,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -28750,7 +28771,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else @@ -28758,10 +28779,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -28771,7 +28792,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -28779,10 +28800,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -28792,7 +28813,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -28800,10 +28821,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -28813,7 +28834,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -28821,10 +28842,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -28834,7 +28855,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else @@ -28842,10 +28863,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -28855,7 +28876,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -28863,10 +28884,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -28876,7 +28897,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 16)); \ __ret; \ }) #else @@ -28884,10 +28905,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -28897,7 +28918,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 18)); \ __ret; \ }) #else @@ -28905,10 +28926,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -28917,7 +28938,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -28925,7 +28946,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 17)); \ __ret; \ }) #else @@ -28933,10 +28954,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -28946,7 +28967,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 0)); \ __ret; \ }) #else @@ -28954,10 +28975,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -28967,7 +28988,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -28975,10 +28996,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -28987,7 +29008,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -28995,7 +29016,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -29003,10 +29024,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vsli_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -29016,7 +29037,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -29024,10 +29045,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -29037,7 +29058,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 50)); \ __ret; \ }) #else @@ -29045,10 +29066,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -29058,7 +29079,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else @@ -29066,10 +29087,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -29079,7 +29100,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -29087,10 +29108,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -29100,7 +29121,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -29108,10 +29129,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -29121,7 +29142,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -29129,10 +29150,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -29142,7 +29163,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else @@ -29150,10 +29171,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -29163,7 +29184,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -29171,10 +29192,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vsraq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsraq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -29184,7 +29205,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 16)); \ __ret; \ }) #else @@ -29192,10 +29213,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -29205,7 +29226,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 18)); \ __ret; \ }) #else @@ -29213,10 +29234,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -29225,7 +29246,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -29233,7 +29254,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 17)); \ __ret; \ }) #else @@ -29241,10 +29262,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -29254,7 +29275,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 0)); \ __ret; \ }) #else @@ -29262,10 +29283,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -29275,7 +29296,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -29283,10 +29304,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -29295,7 +29316,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -29303,7 +29324,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -29311,10 +29332,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vsra_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsra_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -29324,7 +29345,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 4); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 4)); \ __ret; \ }) #else @@ -29332,10 +29353,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x8_t __ret; \ poly8x8_t __s0 = __p0; \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -29345,7 +29366,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 5); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 5)); \ __ret; \ }) #else @@ -29353,10 +29374,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x4_t __ret; \ poly16x4_t __s0 = __p0; \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (poly16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 5); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16x4_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 5)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -29366,7 +29387,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -29374,10 +29395,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -29387,7 +29408,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -29395,10 +29416,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -29408,7 +29429,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -29416,10 +29437,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -29429,7 +29450,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 50)); \ __ret; \ }) #else @@ -29437,10 +29458,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x4_t __ret; \ uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -29450,7 +29471,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else @@ -29458,10 +29479,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -29471,7 +29492,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -29479,10 +29500,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -29492,7 +29513,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -29500,10 +29521,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x16_t __ret; \ int8x16_t __s0 = __p0; \ int8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -29513,7 +29534,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -29521,10 +29542,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -29534,7 +29555,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else @@ -29542,10 +29563,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x2_t __ret; \ int64x2_t __s0 = __p0; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -29555,7 +29576,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -29563,10 +29584,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -29576,7 +29597,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 16); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 16)); \ __ret; \ }) #else @@ -29584,10 +29605,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint8x8_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 16); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 16)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -29597,7 +29618,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 18); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 18)); \ __ret; \ }) #else @@ -29605,10 +29626,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint32x2_t __ret; \ uint32x2_t __s0 = __p0; \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 18); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 18)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -29617,7 +29638,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint64x1_t __ret; \ uint64x1_t __s0 = __p0; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -29625,7 +29646,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 17); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 17)); \ __ret; \ }) #else @@ -29633,10 +29654,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ uint16x4_t __ret; \ uint16x4_t __s0 = __p0; \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (uint16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 17); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 17)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -29646,7 +29667,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 0); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 0)); \ __ret; \ }) #else @@ -29654,10 +29675,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int8x8_t __ret; \ int8x8_t __s0 = __p0; \ int8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x8_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 0); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 0)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ __ret; \ }) #endif @@ -29667,7 +29688,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -29675,10 +29696,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -29687,7 +29708,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int64x1_t __ret; \ int64x1_t __s0 = __p0; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -29695,7 +29716,7 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -29703,10 +29724,10 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vsri_n_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -29714,3128 +29735,3128 @@ __ai __attribute__((target("neon"))) int16x4_t vshl_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ #define vst1_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 4); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 4); \ }) #else #define vst1_p8(__p0, __p1) __extension__ ({ \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 4); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 5); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 5); \ }) #else #define vst1_p16(__p0, __p1) __extension__ ({ \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 5); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 36); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 36); \ }) #else #define vst1q_p8(__p0, __p1) __extension__ ({ \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 36); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 37); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 37); \ }) #else #define vst1q_p16(__p0, __p1) __extension__ ({ \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 37); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 48); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 48); \ }) #else #define vst1q_u8(__p0, __p1) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 48); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 50); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 50); \ }) #else #define vst1q_u32(__p0, __p1) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 50); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 51); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 51); \ }) #else #define vst1q_u64(__p0, __p1) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 51); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 49); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 49); \ }) #else #define vst1q_u16(__p0, __p1) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 49); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 32); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 32); \ }) #else #define vst1q_s8(__p0, __p1) __extension__ ({ \ int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 32); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 41); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 41); \ }) #else #define vst1q_f32(__p0, __p1) __extension__ ({ \ float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 41); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 34); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 34); \ }) #else #define vst1q_s32(__p0, __p1) __extension__ ({ \ int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 34); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 35); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 35); \ }) #else #define vst1q_s64(__p0, __p1) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 35); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 33); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 33); \ }) #else #define vst1q_s16(__p0, __p1) __extension__ ({ \ int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 33); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 16); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 16); \ }) #else #define vst1_u8(__p0, __p1) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 18); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 18); \ }) #else #define vst1_u32(__p0, __p1) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 18); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 18); \ }) #endif #define vst1_u64(__p0, __p1) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 19); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 17); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 17); \ }) #else #define vst1_u16(__p0, __p1) __extension__ ({ \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 17); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 0); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 0); \ }) #else #define vst1_s8(__p0, __p1) __extension__ ({ \ int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 9); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 9); \ }) #else #define vst1_f32(__p0, __p1) __extension__ ({ \ float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 9); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 2); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 2); \ }) #else #define vst1_s32(__p0, __p1) __extension__ ({ \ int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 2); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 2); \ }) #endif #define vst1_s64(__p0, __p1) __extension__ ({ \ int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 3); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 1); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 1); \ }) #else #define vst1_s16(__p0, __p1) __extension__ ({ \ int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 1); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 4); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 4); \ }) #else #define vst1_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8_t __s1 = __p1; \ - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 4); \ + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 5); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 5); \ }) #else #define vst1_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4_t __s1 = __p1; \ - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 5); \ + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 36); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 36); \ }) #else #define vst1q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16_t __s1 = __p1; \ - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 36); \ + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 37); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 37); \ }) #else #define vst1q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8_t __s1 = __p1; \ - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 37); \ + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 48); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 48); \ }) #else #define vst1q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 48); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 50); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 50); \ }) #else #define vst1q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4_t __s1 = __p1; \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 50); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 51); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 51); \ }) #else #define vst1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 51); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 49); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 49); \ }) #else #define vst1q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8_t __s1 = __p1; \ - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 49); \ + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 32); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 32); \ }) #else #define vst1q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16_t __s1 = __p1; \ - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 32); \ + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 41); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 41); \ }) #else #define vst1q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4_t __s1 = __p1; \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 41); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 34); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 34); \ }) #else #define vst1q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4_t __s1 = __p1; \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 34); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 35); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 35); \ }) #else #define vst1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 35); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 33); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 33); \ }) #else #define vst1q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8_t __s1 = __p1; \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 33); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 16); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 16); \ }) #else #define vst1_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 18); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 18); \ }) #else #define vst1_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2_t __s1 = __p1; \ - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 18); \ + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 18); \ }) #endif #define vst1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 19); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 17); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 17); \ }) #else #define vst1_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4_t __s1 = __p1; \ - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 17); \ + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 0); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 0); \ }) #else #define vst1_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8_t __s1 = __p1; \ - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 0); \ + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 9); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 9); \ }) #else #define vst1_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2_t __s1 = __p1; \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 9); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 2); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 2); \ }) #else #define vst1_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2_t __s1 = __p1; \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 2); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 2); \ }) #endif #define vst1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 3); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 1); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 1); \ }) #else #define vst1_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4_t __s1 = __p1; \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 1); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8_x2(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 4); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 4); \ }) #else #define vst1_p8_x2(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16_x2(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 5); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 5); \ }) #else #define vst1_p16_x2(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 36); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 36); \ }) #else #define vst1q_p8_x2(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 37); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 37); \ }) #else #define vst1q_p16_x2(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 48); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 48); \ }) #else #define vst1q_u8_x2(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 50); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 50); \ }) #else #define vst1q_u32_x2(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 51); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 51); \ }) #else #define vst1q_u64_x2(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 49); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 49); \ }) #else #define vst1q_u16_x2(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 32); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 32); \ }) #else #define vst1q_s8_x2(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 41); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 41); \ }) #else #define vst1q_f32_x2(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 34); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 34); \ }) #else #define vst1q_s32_x2(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 35); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 35); \ }) #else #define vst1q_s64_x2(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 33); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 33); \ }) #else #define vst1q_s16_x2(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8_x2(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 16); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 16); \ }) #else #define vst1_u8_x2(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32_x2(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 18); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 18); \ }) #else #define vst1_u32_x2(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 18); \ }) #endif #define vst1_u64_x2(__p0, __p1) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x2(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 17); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 17); \ }) #else #define vst1_u16_x2(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8_x2(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 0); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 0); \ }) #else #define vst1_s8_x2(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x2(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 9); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 9); \ }) #else #define vst1_f32_x2(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x2(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 2); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 2); \ }) #else #define vst1_s32_x2(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 2); \ }) #endif #define vst1_s64_x2(__p0, __p1) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 3); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x2(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 1); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 1); \ }) #else #define vst1_s16_x2(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8_x3(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 4); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 4); \ }) #else #define vst1_p8_x3(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16_x3(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 5); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 5); \ }) #else #define vst1_p16_x3(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 36); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 36); \ }) #else #define vst1q_p8_x3(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 37); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 37); \ }) #else #define vst1q_p16_x3(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 48); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 48); \ }) #else #define vst1q_u8_x3(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 50); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 50); \ }) #else #define vst1q_u32_x3(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 51); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 51); \ }) #else #define vst1q_u64_x3(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 49); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 49); \ }) #else #define vst1q_u16_x3(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 32); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 32); \ }) #else #define vst1q_s8_x3(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 41); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 41); \ }) #else #define vst1q_f32_x3(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 34); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 34); \ }) #else #define vst1q_s32_x3(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 35); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 35); \ }) #else #define vst1q_s64_x3(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 33); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 33); \ }) #else #define vst1q_s16_x3(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8_x3(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 16); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 16); \ }) #else #define vst1_u8_x3(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32_x3(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 18); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 18); \ }) #else #define vst1_u32_x3(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 18); \ }) #endif #define vst1_u64_x3(__p0, __p1) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x3(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 17); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 17); \ }) #else #define vst1_u16_x3(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8_x3(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 0); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 0); \ }) #else #define vst1_s8_x3(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x3(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 9); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 9); \ }) #else #define vst1_f32_x3(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x3(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 2); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 2); \ }) #else #define vst1_s32_x3(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 2); \ }) #endif #define vst1_s64_x3(__p0, __p1) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 3); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x3(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 1); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 1); \ }) #else #define vst1_s16_x3(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p8_x4(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 4); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 4); \ }) #else #define vst1_p8_x4(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_p16_x4(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 5); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 5); \ }) #else #define vst1_p16_x4(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 36); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 36); \ }) #else #define vst1q_p8_x4(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 37); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 37); \ }) #else #define vst1q_p16_x4(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 48); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 48); \ }) #else #define vst1q_u8_x4(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 50); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 50); \ }) #else #define vst1q_u32_x4(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 51); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 51); \ }) #else #define vst1q_u64_x4(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 49); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 49); \ }) #else #define vst1q_u16_x4(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 32); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 32); \ }) #else #define vst1q_s8_x4(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 41); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 41); \ }) #else #define vst1q_f32_x4(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 34); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 34); \ }) #else #define vst1q_s32_x4(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 35); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 35); \ }) #else #define vst1q_s64_x4(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 35); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 33); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 33); \ }) #else #define vst1q_s16_x4(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u8_x4(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 16); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 16); \ }) #else #define vst1_u8_x4(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_u32_x4(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 18); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 18); \ }) #else #define vst1_u32_x4(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 18); \ }) #endif #define vst1_u64_x4(__p0, __p1) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_u16_x4(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 17); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 17); \ }) #else #define vst1_u16_x4(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s8_x4(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 0); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 0); \ }) #else #define vst1_s8_x4(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f32_x4(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 9); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 9); \ }) #else #define vst1_f32_x4(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_s32_x4(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 2); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 2); \ }) #else #define vst1_s32_x4(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 2); \ }) #endif #define vst1_s64_x4(__p0, __p1) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 3); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1_s16_x4(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 1); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 1); \ }) #else #define vst1_s16_x4(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_p8(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 4); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 4); \ }) #else #define vst2_p8(__p0, __p1) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_p16(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 5); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 5); \ }) #else #define vst2_p16(__p0, __p1) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_p8(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 36); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 36); \ }) #else #define vst2q_p8(__p0, __p1) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_p16(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 37); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 37); \ }) #else #define vst2q_p16(__p0, __p1) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u8(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 48); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 48); \ }) #else #define vst2q_u8(__p0, __p1) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u32(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 50); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 50); \ }) #else #define vst2q_u32(__p0, __p1) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u16(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 49); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 49); \ }) #else #define vst2q_u16(__p0, __p1) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s8(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 32); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 32); \ }) #else #define vst2q_s8(__p0, __p1) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f32(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 41); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 41); \ }) #else #define vst2q_f32(__p0, __p1) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s32(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 34); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 34); \ }) #else #define vst2q_s32(__p0, __p1) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s16(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 33); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 33); \ }) #else #define vst2q_s16(__p0, __p1) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_u8(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 16); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 16); \ }) #else #define vst2_u8(__p0, __p1) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_u32(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 18); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 18); \ }) #else #define vst2_u32(__p0, __p1) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 18); \ }) #endif #define vst2_u64(__p0, __p1) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 19); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2_u16(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 17); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 17); \ }) #else #define vst2_u16(__p0, __p1) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_s8(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 0); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 0); \ }) #else #define vst2_s8(__p0, __p1) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_f32(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 9); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 9); \ }) #else #define vst2_f32(__p0, __p1) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_s32(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 2); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 2); \ }) #else #define vst2_s32(__p0, __p1) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 2); \ }) #endif #define vst2_s64(__p0, __p1) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 3); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2_s16(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 1); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 1); \ }) #else #define vst2_s16(__p0, __p1) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 4); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 4); \ }) #else #define vst2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x2_t __s1 = __p1; \ poly8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 5); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 5); \ }) #else #define vst2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x2_t __s1 = __p1; \ poly16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 37); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 37); \ }) #else #define vst2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x2_t __s1 = __p1; \ poly16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 50); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 50); \ }) #else #define vst2q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x2_t __s1 = __p1; \ uint32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 49); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 49); \ }) #else #define vst2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x2_t __s1 = __p1; \ uint16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 41); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 41); \ }) #else #define vst2q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x2_t __s1 = __p1; \ float32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 34); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 34); \ }) #else #define vst2q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x2_t __s1 = __p1; \ int32x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 33); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 33); \ }) #else #define vst2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x2_t __s1 = __p1; \ int16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 16); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 16); \ }) #else #define vst2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x2_t __s1 = __p1; \ uint8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 18); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 18); \ }) #else #define vst2_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x2_t __s1 = __p1; \ uint32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 17); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 17); \ }) #else #define vst2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x2_t __s1 = __p1; \ uint16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 0); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 0); \ }) #else #define vst2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x2_t __s1 = __p1; \ int8x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 9); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 9); \ }) #else #define vst2_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x2_t __s1 = __p1; \ float32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 2); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 2); \ }) #else #define vst2_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x2_t __s1 = __p1; \ int32x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 1); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 1); \ }) #else #define vst2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x2_t __s1 = __p1; \ int16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_p8(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 4); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 4); \ }) #else #define vst3_p8(__p0, __p1) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_p16(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 5); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 5); \ }) #else #define vst3_p16(__p0, __p1) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_p8(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 36); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 36); \ }) #else #define vst3q_p8(__p0, __p1) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_p16(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 37); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 37); \ }) #else #define vst3q_p16(__p0, __p1) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u8(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 48); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 48); \ }) #else #define vst3q_u8(__p0, __p1) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u32(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 50); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 50); \ }) #else #define vst3q_u32(__p0, __p1) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u16(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 49); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 49); \ }) #else #define vst3q_u16(__p0, __p1) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s8(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 32); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 32); \ }) #else #define vst3q_s8(__p0, __p1) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f32(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 41); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 41); \ }) #else #define vst3q_f32(__p0, __p1) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s32(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 34); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 34); \ }) #else #define vst3q_s32(__p0, __p1) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s16(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 33); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 33); \ }) #else #define vst3q_s16(__p0, __p1) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_u8(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 16); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 16); \ }) #else #define vst3_u8(__p0, __p1) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_u32(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 18); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 18); \ }) #else #define vst3_u32(__p0, __p1) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 18); \ }) #endif #define vst3_u64(__p0, __p1) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 19); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3_u16(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 17); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 17); \ }) #else #define vst3_u16(__p0, __p1) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_s8(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 0); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 0); \ }) #else #define vst3_s8(__p0, __p1) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_f32(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 9); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 9); \ }) #else #define vst3_f32(__p0, __p1) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_s32(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 2); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 2); \ }) #else #define vst3_s32(__p0, __p1) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 2); \ }) #endif #define vst3_s64(__p0, __p1) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 3); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3_s16(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 1); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 1); \ }) #else #define vst3_s16(__p0, __p1) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 4); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 4); \ }) #else #define vst3_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x3_t __s1 = __p1; \ poly8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 5); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 5); \ }) #else #define vst3_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x3_t __s1 = __p1; \ poly16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 37); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 37); \ }) #else #define vst3q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x3_t __s1 = __p1; \ poly16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 50); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 50); \ }) #else #define vst3q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x3_t __s1 = __p1; \ uint32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 49); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 49); \ }) #else #define vst3q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x3_t __s1 = __p1; \ uint16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 41); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 41); \ }) #else #define vst3q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x3_t __s1 = __p1; \ float32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 34); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 34); \ }) #else #define vst3q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x3_t __s1 = __p1; \ int32x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 33); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 33); \ }) #else #define vst3q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x3_t __s1 = __p1; \ int16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 16); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 16); \ }) #else #define vst3_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x3_t __s1 = __p1; \ uint8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 18); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 18); \ }) #else #define vst3_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x3_t __s1 = __p1; \ uint32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 17); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 17); \ }) #else #define vst3_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x3_t __s1 = __p1; \ uint16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 0); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 0); \ }) #else #define vst3_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x3_t __s1 = __p1; \ int8x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 9); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 9); \ }) #else #define vst3_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x3_t __s1 = __p1; \ float32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 2); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 2); \ }) #else #define vst3_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x3_t __s1 = __p1; \ int32x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 1); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 1); \ }) #else #define vst3_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x3_t __s1 = __p1; \ int16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_p8(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 4); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 4); \ }) #else #define vst4_p8(__p0, __p1) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_p16(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 5); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 5); \ }) #else #define vst4_p16(__p0, __p1) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_p8(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 36); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 36); \ }) #else #define vst4q_p8(__p0, __p1) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_p16(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 37); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 37); \ }) #else #define vst4q_p16(__p0, __p1) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u8(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 48); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 48); \ }) #else #define vst4q_u8(__p0, __p1) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u32(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 50); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 50); \ }) #else #define vst4q_u32(__p0, __p1) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u16(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 49); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 49); \ }) #else #define vst4q_u16(__p0, __p1) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s8(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 32); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 32); \ }) #else #define vst4q_s8(__p0, __p1) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f32(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 41); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 41); \ }) #else #define vst4q_f32(__p0, __p1) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s32(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 34); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 34); \ }) #else #define vst4q_s32(__p0, __p1) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s16(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 33); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 33); \ }) #else #define vst4q_s16(__p0, __p1) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_u8(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 16); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 16); \ }) #else #define vst4_u8(__p0, __p1) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_u32(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 18); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 18); \ }) #else #define vst4_u32(__p0, __p1) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 18); \ }) #endif #define vst4_u64(__p0, __p1) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 19); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 19); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4_u16(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 17); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 17); \ }) #else #define vst4_u16(__p0, __p1) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_s8(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 0); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 0); \ }) #else #define vst4_s8(__p0, __p1) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_f32(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 9); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 9); \ }) #else #define vst4_f32(__p0, __p1) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_s32(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 2); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 2); \ }) #else #define vst4_s32(__p0, __p1) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 2); \ }) #endif #define vst4_s64(__p0, __p1) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 3); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 3); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4_s16(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 1); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 1); \ }) #else #define vst4_s16(__p0, __p1) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 1); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 4); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 4); \ }) #else #define vst4_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x8x4_t __s1 = __p1; \ poly8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 4); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 4); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 5); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 5); \ }) #else #define vst4_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x4x4_t __s1 = __p1; \ poly16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 5); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 5); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 37); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 37); \ }) #else #define vst4q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ poly16x8x4_t __s1 = __p1; \ poly16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 37); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 37); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 50); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 50); \ }) #else #define vst4q_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x4x4_t __s1 = __p1; \ uint32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 50); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 50); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 49); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 49); \ }) #else #define vst4q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x8x4_t __s1 = __p1; \ uint16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 49); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 49); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 41); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 41); \ }) #else #define vst4q_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x4x4_t __s1 = __p1; \ float32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 41); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 41); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 34); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 34); \ }) #else #define vst4q_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x4x4_t __s1 = __p1; \ int32x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 34); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_32); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 34); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 33); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 33); \ }) #else #define vst4q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x8x4_t __s1 = __p1; \ int16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 33); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 33); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 16); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 16); \ }) #else #define vst4_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x8x4_t __s1 = __p1; \ uint8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 16); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 16); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 18); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 18); \ }) #else #define vst4_lane_u32(__p0, __p1, __p2) __extension__ ({ \ uint32x2x4_t __s1 = __p1; \ uint32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 18); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 18); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 17); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 17); \ }) #else #define vst4_lane_u16(__p0, __p1, __p2) __extension__ ({ \ uint16x4x4_t __s1 = __p1; \ uint16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 17); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 17); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 0); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 0); \ }) #else #define vst4_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x8x4_t __s1 = __p1; \ int8x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 0); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 0); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 9); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 9); \ }) #else #define vst4_lane_f32(__p0, __p1, __p2) __extension__ ({ \ float32x2x4_t __s1 = __p1; \ float32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 9); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 9); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 2); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 2); \ }) #else #define vst4_lane_s32(__p0, __p1, __p2) __extension__ ({ \ int32x2x4_t __s1 = __p1; \ int32x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 2); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_32); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_32); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_32); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_32); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 2); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 1); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 1); \ }) #else #define vst4_lane_s16(__p0, __p1, __p2) __extension__ ({ \ int16x4x4_t __s1 = __p1; \ int16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 1); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 1); \ }) #endif @@ -32848,10 +32869,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vsubq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -32865,10 +32886,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vsubq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -32882,10 +32903,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vsubq_u64(uint64x2_t __p0, uint6 #else __ai __attribute__((target("neon"))) uint64x2_t vsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -32899,10 +32920,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vsubq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -32916,10 +32937,10 @@ __ai __attribute__((target("neon"))) int8x16_t vsubq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -32933,10 +32954,10 @@ __ai __attribute__((target("neon"))) float32x4_t vsubq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vsubq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -32950,10 +32971,10 @@ __ai __attribute__((target("neon"))) int32x4_t vsubq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -32967,10 +32988,10 @@ __ai __attribute__((target("neon"))) int64x2_t vsubq_s64(int64x2_t __p0, int64x2 #else __ai __attribute__((target("neon"))) int64x2_t vsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -32984,10 +33005,10 @@ __ai __attribute__((target("neon"))) int16x8_t vsubq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -33001,10 +33022,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vsub_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33018,10 +33039,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vsub_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -33040,10 +33061,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vsub_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -33057,10 +33078,10 @@ __ai __attribute__((target("neon"))) int8x8_t vsub_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33074,10 +33095,10 @@ __ai __attribute__((target("neon"))) float32x2_t vsub_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vsub_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -33091,10 +33112,10 @@ __ai __attribute__((target("neon"))) int32x2_t vsub_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -33113,10 +33134,10 @@ __ai __attribute__((target("neon"))) int16x4_t vsub_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -33124,21 +33145,21 @@ __ai __attribute__((target("neon"))) int16x4_t vsub_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t __noswap_vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 17)); return __ret; } #endif @@ -33146,21 +33167,21 @@ __ai __attribute__((target("neon"))) uint16x4_t __noswap_vsubhn_u32(uint32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t __noswap_vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 18)); return __ret; } #endif @@ -33168,21 +33189,21 @@ __ai __attribute__((target("neon"))) uint32x2_t __noswap_vsubhn_u64(uint64x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t __noswap_vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 16)); return __ret; } #endif @@ -33190,21 +33211,21 @@ __ai __attribute__((target("neon"))) uint8x8_t __noswap_vsubhn_u16(uint16x8_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) int16x4_t __noswap_vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 1)); return __ret; } #endif @@ -33212,21 +33233,21 @@ __ai __attribute__((target("neon"))) int16x4_t __noswap_vsubhn_s32(int32x4_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) int32x2_t __noswap_vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 2)); return __ret; } #endif @@ -33234,21 +33255,21 @@ __ai __attribute__((target("neon"))) int32x2_t __noswap_vsubhn_s64(int64x2_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__rev0, (int8x16_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } __ai __attribute__((target("neon"))) int8x8_t __noswap_vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vsubhn_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 0)); return __ret; } #endif @@ -33262,10 +33283,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vsubl_u8(uint8x8_t __p0, uint8x8 #else __ai __attribute__((target("neon"))) uint16x8_t vsubl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __noswap_vmovl_u8(__rev0) - __noswap_vmovl_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -33279,10 +33300,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vsubl_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vsubl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vmovl_u32(__rev0) - __noswap_vmovl_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -33296,10 +33317,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vsubl_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vsubl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __noswap_vmovl_u16(__rev0) - __noswap_vmovl_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -33313,10 +33334,10 @@ __ai __attribute__((target("neon"))) int16x8_t vsubl_s8(int8x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vsubl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __noswap_vmovl_s8(__rev0) - __noswap_vmovl_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -33330,10 +33351,10 @@ __ai __attribute__((target("neon"))) int64x2_t vsubl_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vsubl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vmovl_s32(__rev0) - __noswap_vmovl_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -33347,10 +33368,10 @@ __ai __attribute__((target("neon"))) int32x4_t vsubl_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vsubl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __noswap_vmovl_s16(__rev0) - __noswap_vmovl_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -33364,10 +33385,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vsubw_u8(uint16x8_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint16x8_t vsubw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 - __noswap_vmovl_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -33381,10 +33402,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vsubw_u32(uint64x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vsubw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __noswap_vmovl_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -33398,10 +33419,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vsubw_u16(uint32x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vsubw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __noswap_vmovl_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -33415,10 +33436,10 @@ __ai __attribute__((target("neon"))) int16x8_t vsubw_s8(int16x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vsubw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 - __noswap_vmovl_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -33432,10 +33453,10 @@ __ai __attribute__((target("neon"))) int64x2_t vsubw_s32(int64x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vsubw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __noswap_vmovl_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -33449,10 +33470,10 @@ __ai __attribute__((target("neon"))) int32x4_t vsubw_s16(int32x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vsubw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __noswap_vmovl_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -33460,16 +33481,16 @@ __ai __attribute__((target("neon"))) int32x4_t vsubw_s16(int32x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbl1_p8(poly8x8_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbl1_p8(poly8x8_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__rev0, (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33477,16 +33498,16 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbl1_p8(poly8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbl1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbl1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33494,16 +33515,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbl1_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbl1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbl1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33511,18 +33532,18 @@ __ai __attribute__((target("neon"))) int8x8_t vtbl1_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbl2_p8(poly8x8x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbl2_p8(poly8x8x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33530,18 +33551,18 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbl2_p8(poly8x8x2_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbl2_u8(uint8x8x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbl2_u8(uint8x8x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33549,18 +33570,18 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbl2_u8(uint8x8x2_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbl2_s8(int8x8x2_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbl2_s8(int8x8x2_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33568,19 +33589,19 @@ __ai __attribute__((target("neon"))) int8x8_t vtbl2_s8(int8x8x2_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbl3_p8(poly8x8x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbl3_p8(poly8x8x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33588,19 +33609,19 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbl3_p8(poly8x8x3_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbl3_u8(uint8x8x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbl3_u8(uint8x8x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33608,19 +33629,19 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbl3_u8(uint8x8x3_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbl3_s8(int8x8x3_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbl3_s8(int8x8x3_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33628,20 +33649,20 @@ __ai __attribute__((target("neon"))) int8x8_t vtbl3_s8(int8x8x3_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbl4_p8(poly8x8x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbl4_p8(poly8x8x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x8x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev0.val[3], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33649,20 +33670,20 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbl4_p8(poly8x8x4_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbl4_u8(uint8x8x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbl4_u8(uint8x8x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x8x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev0.val[3], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33670,20 +33691,20 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbl4_u8(uint8x8x4_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbl4_s8(int8x8x4_t __p0, int8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbl4_s8(int8x8x4_t __p0, int8x8_t __p1) { int8x8_t __ret; int8x8x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__rev0.val[0], (int8x8_t)__rev0.val[1], (int8x8_t)__rev0.val[2], (int8x8_t)__rev0.val[3], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33691,17 +33712,17 @@ __ai __attribute__((target("neon"))) int8x8_t vtbl4_s8(int8x8x4_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbx1_p8(poly8x8_t __p0, poly8x8_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbx1_p8(poly8x8_t __p0, poly8x8_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33709,17 +33730,17 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbx1_p8(poly8x8_t __p0, poly8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbx1_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbx1_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33727,17 +33748,17 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbx1_u8(uint8x8_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbx1_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbx1_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33745,19 +33766,19 @@ __ai __attribute__((target("neon"))) int8x8_t vtbx1_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbx2_p8(poly8x8_t __p0, poly8x8x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbx2_p8(poly8x8_t __p0, poly8x8x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); poly8x8x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33765,19 +33786,19 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbx2_p8(poly8x8_t __p0, poly8x8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbx2_u8(uint8x8_t __p0, uint8x8x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbx2_u8(uint8x8_t __p0, uint8x8x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); uint8x8x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33785,19 +33806,19 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbx2_u8(uint8x8_t __p0, uint8x8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbx2_s8(int8x8_t __p0, int8x8x2_t __p1, int8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbx2_s8(int8x8_t __p0, int8x8x2_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); int8x8x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33805,20 +33826,20 @@ __ai __attribute__((target("neon"))) int8x8_t vtbx2_s8(int8x8_t __p0, int8x8x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbx3_p8(poly8x8_t __p0, poly8x8x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbx3_p8(poly8x8_t __p0, poly8x8x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); poly8x8x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33826,20 +33847,20 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbx3_p8(poly8x8_t __p0, poly8x8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbx3_u8(uint8x8_t __p0, uint8x8x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbx3_u8(uint8x8_t __p0, uint8x8x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); uint8x8x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33847,20 +33868,20 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbx3_u8(uint8x8_t __p0, uint8x8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbx3_s8(int8x8_t __p0, int8x8x3_t __p1, int8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbx3_s8(int8x8_t __p0, int8x8x3_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); int8x8x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33868,21 +33889,21 @@ __ai __attribute__((target("neon"))) int8x8_t vtbx3_s8(int8x8_t __p0, int8x8x3_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vtbx4_p8(poly8x8_t __p0, poly8x8x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vtbx4_p8(poly8x8_t __p0, poly8x8x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); poly8x8x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33890,21 +33911,21 @@ __ai __attribute__((target("neon"))) poly8x8_t vtbx4_p8(poly8x8_t __p0, poly8x8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtbx4_u8(uint8x8_t __p0, uint8x8x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtbx4_u8(uint8x8_t __p0, uint8x8x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); uint8x8x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33912,21 +33933,21 @@ __ai __attribute__((target("neon"))) uint8x8_t vtbx4_u8(uint8x8_t __p0, uint8x8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vtbx4_s8(int8x8_t __p0, int8x8x4_t __p1, int8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vtbx4_s8(int8x8_t __p0, int8x8x4_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); int8x8x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__rev0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -33934,18 +33955,18 @@ __ai __attribute__((target("neon"))) int8x8_t vtbx4_s8(int8x8_t __p0, int8x8x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8x2_t vtrn_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8x2_t vtrn_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 4); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -33953,18 +33974,18 @@ __ai __attribute__((target("neon"))) poly8x8x2_t vtrn_p8(poly8x8_t __p0, poly8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x4x2_t vtrn_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 5); return __ret; } #else __ai __attribute__((target("neon"))) poly16x4x2_t vtrn_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 5); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 5); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -33972,18 +33993,18 @@ __ai __attribute__((target("neon"))) poly16x4x2_t vtrn_p16(poly16x4_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16x2_t vtrnq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 36); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16x2_t vtrnq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 36); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 36); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -33991,18 +34012,18 @@ __ai __attribute__((target("neon"))) poly8x16x2_t vtrnq_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x8x2_t vtrnq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 37); return __ret; } #else __ai __attribute__((target("neon"))) poly16x8x2_t vtrnq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 37); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 37); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34010,18 +34031,18 @@ __ai __attribute__((target("neon"))) poly16x8x2_t vtrnq_p16(poly16x8_t __p0, pol #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16x2_t vtrnq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16x2_t vtrnq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 48); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -34029,18 +34050,18 @@ __ai __attribute__((target("neon"))) uint8x16x2_t vtrnq_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4x2_t vtrnq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4x2_t vtrnq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 50); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -34048,18 +34069,18 @@ __ai __attribute__((target("neon"))) uint32x4x2_t vtrnq_u32(uint32x4_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8x2_t vtrnq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8x2_t vtrnq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 49); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34067,18 +34088,18 @@ __ai __attribute__((target("neon"))) uint16x8x2_t vtrnq_u16(uint16x8_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16x2_t vtrnq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32); return __ret; } #else __ai __attribute__((target("neon"))) int8x16x2_t vtrnq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 32); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -34086,18 +34107,18 @@ __ai __attribute__((target("neon"))) int8x16x2_t vtrnq_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4x2_t vtrnq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41); return __ret; } #else __ai __attribute__((target("neon"))) float32x4x2_t vtrnq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 41); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -34105,18 +34126,18 @@ __ai __attribute__((target("neon"))) float32x4x2_t vtrnq_f32(float32x4_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4x2_t vtrnq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34); return __ret; } #else __ai __attribute__((target("neon"))) int32x4x2_t vtrnq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 34); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -34124,18 +34145,18 @@ __ai __attribute__((target("neon"))) int32x4x2_t vtrnq_s32(int32x4_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8x2_t vtrnq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33); return __ret; } #else __ai __attribute__((target("neon"))) int16x8x2_t vtrnq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 33); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34143,18 +34164,18 @@ __ai __attribute__((target("neon"))) int16x8x2_t vtrnq_s16(int16x8_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8x2_t vtrn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8x2_t vtrn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 16); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -34162,18 +34183,18 @@ __ai __attribute__((target("neon"))) uint8x8x2_t vtrn_u8(uint8x8_t __p0, uint8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2x2_t vtrn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2x2_t vtrn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 18); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -34181,18 +34202,18 @@ __ai __attribute__((target("neon"))) uint32x2x2_t vtrn_u32(uint32x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4x2_t vtrn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4x2_t vtrn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 17); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34200,18 +34221,18 @@ __ai __attribute__((target("neon"))) uint16x4x2_t vtrn_u16(uint16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8x2_t vtrn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0); return __ret; } #else __ai __attribute__((target("neon"))) int8x8x2_t vtrn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -34219,18 +34240,18 @@ __ai __attribute__((target("neon"))) int8x8x2_t vtrn_s8(int8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2x2_t vtrn_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9); return __ret; } #else __ai __attribute__((target("neon"))) float32x2x2_t vtrn_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 9); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -34238,18 +34259,18 @@ __ai __attribute__((target("neon"))) float32x2x2_t vtrn_f32(float32x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2x2_t vtrn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2); return __ret; } #else __ai __attribute__((target("neon"))) int32x2x2_t vtrn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 2); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -34257,18 +34278,18 @@ __ai __attribute__((target("neon"))) int32x2x2_t vtrn_s32(int32x2_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4x2_t vtrn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1); return __ret; } #else __ai __attribute__((target("neon"))) int16x4x2_t vtrn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 1); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34276,18 +34297,18 @@ __ai __attribute__((target("neon"))) int16x4x2_t vtrn_s16(int16x4_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40); return __ret; } #else __ai __attribute__((target("neon"))) float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34295,18 +34316,18 @@ __ai __attribute__((target("neon"))) float16x8x2_t vtrnq_f16(float16x8_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8); return __ret; } #else __ai __attribute__((target("neon"))) float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vtrn_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34314,16 +34335,16 @@ __ai __attribute__((target("neon"))) float16x4x2_t vtrn_f16(float16x4_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtst_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtst_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -34331,16 +34352,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vtst_p8(poly8x8_t __p0, poly8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vtst_p16(poly16x4_t __p0, poly16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vtst_p16(poly16x4_t __p0, poly16x4_t __p1) { uint16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -34348,16 +34369,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vtst_p16(poly16x4_t __p0, poly16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vtstq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vtstq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -34365,16 +34386,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vtstq_p8(poly8x16_t __p0, poly8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vtstq_p16(poly16x8_t __p0, poly16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vtstq_p16(poly16x8_t __p0, poly16x8_t __p1) { uint16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -34382,16 +34403,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vtstq_p16(poly16x8_t __p0, poly1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vtstq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vtstq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -34399,16 +34420,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vtstq_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vtstq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vtstq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -34416,16 +34437,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vtstq_u32(uint32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vtstq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vtstq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -34433,16 +34454,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vtstq_u16(uint16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vtstq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vtstq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -34450,16 +34471,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vtstq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vtstq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vtstq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -34467,16 +34488,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vtstq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vtstq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vtstq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -34484,16 +34505,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vtstq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtst_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtst_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -34501,16 +34522,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vtst_u8(uint8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vtst_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vtst_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -34518,16 +34539,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vtst_u32(uint32x2_t __p0, uint32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vtst_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vtst_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -34535,16 +34556,16 @@ __ai __attribute__((target("neon"))) uint16x4_t vtst_u16(uint16x4_t __p0, uint16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vtst_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vtst_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -34552,16 +34573,16 @@ __ai __attribute__((target("neon"))) uint8x8_t vtst_s8(int8x8_t __p0, int8x8_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vtst_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vtst_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -34569,16 +34590,16 @@ __ai __attribute__((target("neon"))) uint32x2_t vtst_s32(int32x2_t __p0, int32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vtst_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vtst_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -34586,18 +34607,18 @@ __ai __attribute__((target("neon"))) uint16x4_t vtst_s16(int16x4_t __p0, int16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8x2_t vuzp_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8x2_t vuzp_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 4); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -34605,18 +34626,18 @@ __ai __attribute__((target("neon"))) poly8x8x2_t vuzp_p8(poly8x8_t __p0, poly8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x4x2_t vuzp_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 5); return __ret; } #else __ai __attribute__((target("neon"))) poly16x4x2_t vuzp_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 5); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 5); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34624,18 +34645,18 @@ __ai __attribute__((target("neon"))) poly16x4x2_t vuzp_p16(poly16x4_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16x2_t vuzpq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 36); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16x2_t vuzpq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 36); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 36); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -34643,18 +34664,18 @@ __ai __attribute__((target("neon"))) poly8x16x2_t vuzpq_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x8x2_t vuzpq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 37); return __ret; } #else __ai __attribute__((target("neon"))) poly16x8x2_t vuzpq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 37); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 37); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34662,18 +34683,18 @@ __ai __attribute__((target("neon"))) poly16x8x2_t vuzpq_p16(poly16x8_t __p0, pol #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16x2_t vuzpq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16x2_t vuzpq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 48); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -34681,18 +34702,18 @@ __ai __attribute__((target("neon"))) uint8x16x2_t vuzpq_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4x2_t vuzpq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4x2_t vuzpq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 50); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -34700,18 +34721,18 @@ __ai __attribute__((target("neon"))) uint32x4x2_t vuzpq_u32(uint32x4_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8x2_t vuzpq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8x2_t vuzpq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 49); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34719,18 +34740,18 @@ __ai __attribute__((target("neon"))) uint16x8x2_t vuzpq_u16(uint16x8_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16x2_t vuzpq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32); return __ret; } #else __ai __attribute__((target("neon"))) int8x16x2_t vuzpq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 32); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -34738,18 +34759,18 @@ __ai __attribute__((target("neon"))) int8x16x2_t vuzpq_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4x2_t vuzpq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41); return __ret; } #else __ai __attribute__((target("neon"))) float32x4x2_t vuzpq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 41); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -34757,18 +34778,18 @@ __ai __attribute__((target("neon"))) float32x4x2_t vuzpq_f32(float32x4_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4x2_t vuzpq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34); return __ret; } #else __ai __attribute__((target("neon"))) int32x4x2_t vuzpq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 34); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -34776,18 +34797,18 @@ __ai __attribute__((target("neon"))) int32x4x2_t vuzpq_s32(int32x4_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8x2_t vuzpq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33); return __ret; } #else __ai __attribute__((target("neon"))) int16x8x2_t vuzpq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 33); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34795,18 +34816,18 @@ __ai __attribute__((target("neon"))) int16x8x2_t vuzpq_s16(int16x8_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8x2_t vuzp_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8x2_t vuzp_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 16); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -34814,18 +34835,18 @@ __ai __attribute__((target("neon"))) uint8x8x2_t vuzp_u8(uint8x8_t __p0, uint8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2x2_t vuzp_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2x2_t vuzp_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 18); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -34833,18 +34854,18 @@ __ai __attribute__((target("neon"))) uint32x2x2_t vuzp_u32(uint32x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4x2_t vuzp_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4x2_t vuzp_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 17); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34852,18 +34873,18 @@ __ai __attribute__((target("neon"))) uint16x4x2_t vuzp_u16(uint16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8x2_t vuzp_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0); return __ret; } #else __ai __attribute__((target("neon"))) int8x8x2_t vuzp_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -34871,18 +34892,18 @@ __ai __attribute__((target("neon"))) int8x8x2_t vuzp_s8(int8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2x2_t vuzp_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9); return __ret; } #else __ai __attribute__((target("neon"))) float32x2x2_t vuzp_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 9); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -34890,18 +34911,18 @@ __ai __attribute__((target("neon"))) float32x2x2_t vuzp_f32(float32x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2x2_t vuzp_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2); return __ret; } #else __ai __attribute__((target("neon"))) int32x2x2_t vuzp_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 2); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -34909,18 +34930,18 @@ __ai __attribute__((target("neon"))) int32x2x2_t vuzp_s32(int32x2_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4x2_t vuzp_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1); return __ret; } #else __ai __attribute__((target("neon"))) int16x4x2_t vuzp_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 1); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34928,18 +34949,18 @@ __ai __attribute__((target("neon"))) int16x4x2_t vuzp_s16(int16x4_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40); return __ret; } #else __ai __attribute__((target("neon"))) float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -34947,18 +34968,18 @@ __ai __attribute__((target("neon"))) float16x8x2_t vuzpq_f16(float16x8_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8); return __ret; } #else __ai __attribute__((target("neon"))) float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vuzp_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -34966,18 +34987,18 @@ __ai __attribute__((target("neon"))) float16x4x2_t vuzp_f16(float16x4_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8x2_t vzip_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8x2_t vzip_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 4); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -34985,18 +35006,18 @@ __ai __attribute__((target("neon"))) poly8x8x2_t vzip_p8(poly8x8_t __p0, poly8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x4x2_t vzip_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 5); return __ret; } #else __ai __attribute__((target("neon"))) poly16x4x2_t vzip_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 5); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 5); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -35004,18 +35025,18 @@ __ai __attribute__((target("neon"))) poly16x4x2_t vzip_p16(poly16x4_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16x2_t vzipq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 36); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16x2_t vzipq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 36); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 36); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -35023,18 +35044,18 @@ __ai __attribute__((target("neon"))) poly8x16x2_t vzipq_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly16x8x2_t vzipq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 37); return __ret; } #else __ai __attribute__((target("neon"))) poly16x8x2_t vzipq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 37); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 37); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -35042,18 +35063,18 @@ __ai __attribute__((target("neon"))) poly16x8x2_t vzipq_p16(poly16x8_t __p0, pol #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16x2_t vzipq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16x2_t vzipq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 48); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -35061,18 +35082,18 @@ __ai __attribute__((target("neon"))) uint8x16x2_t vzipq_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4x2_t vzipq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4x2_t vzipq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 50); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -35080,18 +35101,18 @@ __ai __attribute__((target("neon"))) uint32x4x2_t vzipq_u32(uint32x4_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8x2_t vzipq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8x2_t vzipq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 49); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -35099,18 +35120,18 @@ __ai __attribute__((target("neon"))) uint16x8x2_t vzipq_u16(uint16x8_t __p0, uin #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16x2_t vzipq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32); return __ret; } #else __ai __attribute__((target("neon"))) int8x16x2_t vzipq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 32); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); return __ret; } #endif @@ -35118,18 +35139,18 @@ __ai __attribute__((target("neon"))) int8x16x2_t vzipq_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4x2_t vzipq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41); return __ret; } #else __ai __attribute__((target("neon"))) float32x4x2_t vzipq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 41); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -35137,18 +35158,18 @@ __ai __attribute__((target("neon"))) float32x4x2_t vzipq_f32(float32x4_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4x2_t vzipq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34); return __ret; } #else __ai __attribute__((target("neon"))) int32x4x2_t vzipq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 34); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_32); return __ret; } #endif @@ -35156,18 +35177,18 @@ __ai __attribute__((target("neon"))) int32x4x2_t vzipq_s32(int32x4_t __p0, int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8x2_t vzipq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33); return __ret; } #else __ai __attribute__((target("neon"))) int16x8x2_t vzipq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 33); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -35175,18 +35196,18 @@ __ai __attribute__((target("neon"))) int16x8x2_t vzipq_s16(int16x8_t __p0, int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8x2_t vzip_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8x2_t vzip_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 16); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -35194,18 +35215,18 @@ __ai __attribute__((target("neon"))) uint8x8x2_t vzip_u8(uint8x8_t __p0, uint8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2x2_t vzip_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2x2_t vzip_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 18); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -35213,18 +35234,18 @@ __ai __attribute__((target("neon"))) uint32x2x2_t vzip_u32(uint32x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4x2_t vzip_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4x2_t vzip_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 17); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -35232,18 +35253,18 @@ __ai __attribute__((target("neon"))) uint16x4x2_t vzip_u16(uint16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8x2_t vzip_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0); return __ret; } #else __ai __attribute__((target("neon"))) int8x8x2_t vzip_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); return __ret; } #endif @@ -35251,18 +35272,18 @@ __ai __attribute__((target("neon"))) int8x8x2_t vzip_s8(int8x8_t __p0, int8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2x2_t vzip_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9); return __ret; } #else __ai __attribute__((target("neon"))) float32x2x2_t vzip_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 9); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -35270,18 +35291,18 @@ __ai __attribute__((target("neon"))) float32x2x2_t vzip_f32(float32x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2x2_t vzip_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2); return __ret; } #else __ai __attribute__((target("neon"))) int32x2x2_t vzip_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 2); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_32); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_32); return __ret; } #endif @@ -35289,18 +35310,18 @@ __ai __attribute__((target("neon"))) int32x2x2_t vzip_s32(int32x2_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4x2_t vzip_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1); return __ret; } #else __ai __attribute__((target("neon"))) int16x4x2_t vzip_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 1); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -35308,18 +35329,18 @@ __ai __attribute__((target("neon"))) int16x4x2_t vzip_s16(int16x4_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40); return __ret; } #else __ai __attribute__((target("neon"))) float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __builtin_neon_vzipq_v(&__ret, (int8x16_t)__rev0, (int8x16_t)__rev1, 40); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); return __ret; } #endif @@ -35327,18 +35348,18 @@ __ai __attribute__((target("neon"))) float16x8x2_t vzipq_f16(float16x8_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; - __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8); return __ret; } #else __ai __attribute__((target("neon"))) float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __builtin_neon_vzip_v(&__ret, (int8x8_t)__rev0, (int8x8_t)__rev1, 8); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8); - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); return __ret; } #endif @@ -35346,22 +35367,22 @@ __ai __attribute__((target("neon"))) float16x4x2_t vzip_f16(float16x4_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmlahq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmlahq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int32x4_t __noswap_vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmlahq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #endif @@ -35369,22 +35390,22 @@ __ai __attribute__((target("v8.1a,neon"))) int32x4_t __noswap_vqrdmlahq_s32(int3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmlahq_s16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmlahq_s16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int16x8_t __noswap_vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmlahq_s16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #endif @@ -35392,22 +35413,22 @@ __ai __attribute__((target("v8.1a,neon"))) int16x8_t __noswap_vqrdmlahq_s16(int1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmlah_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmlah_s32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int32x2_t __noswap_vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmlah_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #endif @@ -35415,22 +35436,22 @@ __ai __attribute__((target("v8.1a,neon"))) int32x2_t __noswap_vqrdmlah_s32(int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmlah_s16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 1)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmlah_s16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmlah_s16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 1)); return __ret; } #endif @@ -35450,11 +35471,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlah_s16(int16 int32x4_t __s0_135 = __p0_135; \ int32x4_t __s1_135 = __p1_135; \ int32x2_t __s2_135 = __p2_135; \ - int32x4_t __rev0_135; __rev0_135 = __builtin_shufflevector(__s0_135, __s0_135, 3, 2, 1, 0); \ - int32x4_t __rev1_135; __rev1_135 = __builtin_shufflevector(__s1_135, __s1_135, 3, 2, 1, 0); \ - int32x2_t __rev2_135; __rev2_135 = __builtin_shufflevector(__s2_135, __s2_135, 1, 0); \ + int32x4_t __rev0_135; __rev0_135 = __builtin_shufflevector(__s0_135, __s0_135, __lane_reverse_128_32); \ + int32x4_t __rev1_135; __rev1_135 = __builtin_shufflevector(__s1_135, __s1_135, __lane_reverse_128_32); \ + int32x2_t __rev2_135; __rev2_135 = __builtin_shufflevector(__s2_135, __s2_135, __lane_reverse_64_32); \ __ret_135 = __noswap_vqrdmlahq_s32(__rev0_135, __rev1_135, __noswap_splatq_lane_s32(__rev2_135, __p3_135)); \ - __ret_135 = __builtin_shufflevector(__ret_135, __ret_135, 3, 2, 1, 0); \ + __ret_135 = __builtin_shufflevector(__ret_135, __ret_135, __lane_reverse_128_32); \ __ret_135; \ }) #endif @@ -35474,11 +35495,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlah_s16(int16 int16x8_t __s0_137 = __p0_137; \ int16x8_t __s1_137 = __p1_137; \ int16x4_t __s2_137 = __p2_137; \ - int16x8_t __rev0_137; __rev0_137 = __builtin_shufflevector(__s0_137, __s0_137, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_137; __rev1_137 = __builtin_shufflevector(__s1_137, __s1_137, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_137; __rev2_137 = __builtin_shufflevector(__s2_137, __s2_137, 3, 2, 1, 0); \ + int16x8_t __rev0_137; __rev0_137 = __builtin_shufflevector(__s0_137, __s0_137, __lane_reverse_128_16); \ + int16x8_t __rev1_137; __rev1_137 = __builtin_shufflevector(__s1_137, __s1_137, __lane_reverse_128_16); \ + int16x4_t __rev2_137; __rev2_137 = __builtin_shufflevector(__s2_137, __s2_137, __lane_reverse_64_16); \ __ret_137 = __noswap_vqrdmlahq_s16(__rev0_137, __rev1_137, __noswap_splatq_lane_s16(__rev2_137, __p3_137)); \ - __ret_137 = __builtin_shufflevector(__ret_137, __ret_137, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_137 = __builtin_shufflevector(__ret_137, __ret_137, __lane_reverse_128_16); \ __ret_137; \ }) #endif @@ -35498,11 +35519,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlah_s16(int16 int32x2_t __s0_139 = __p0_139; \ int32x2_t __s1_139 = __p1_139; \ int32x2_t __s2_139 = __p2_139; \ - int32x2_t __rev0_139; __rev0_139 = __builtin_shufflevector(__s0_139, __s0_139, 1, 0); \ - int32x2_t __rev1_139; __rev1_139 = __builtin_shufflevector(__s1_139, __s1_139, 1, 0); \ - int32x2_t __rev2_139; __rev2_139 = __builtin_shufflevector(__s2_139, __s2_139, 1, 0); \ + int32x2_t __rev0_139; __rev0_139 = __builtin_shufflevector(__s0_139, __s0_139, __lane_reverse_64_32); \ + int32x2_t __rev1_139; __rev1_139 = __builtin_shufflevector(__s1_139, __s1_139, __lane_reverse_64_32); \ + int32x2_t __rev2_139; __rev2_139 = __builtin_shufflevector(__s2_139, __s2_139, __lane_reverse_64_32); \ __ret_139 = __noswap_vqrdmlah_s32(__rev0_139, __rev1_139, __noswap_splat_lane_s32(__rev2_139, __p3_139)); \ - __ret_139 = __builtin_shufflevector(__ret_139, __ret_139, 1, 0); \ + __ret_139 = __builtin_shufflevector(__ret_139, __ret_139, __lane_reverse_64_32); \ __ret_139; \ }) #endif @@ -35522,11 +35543,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlah_s16(int16 int16x4_t __s0_141 = __p0_141; \ int16x4_t __s1_141 = __p1_141; \ int16x4_t __s2_141 = __p2_141; \ - int16x4_t __rev0_141; __rev0_141 = __builtin_shufflevector(__s0_141, __s0_141, 3, 2, 1, 0); \ - int16x4_t __rev1_141; __rev1_141 = __builtin_shufflevector(__s1_141, __s1_141, 3, 2, 1, 0); \ - int16x4_t __rev2_141; __rev2_141 = __builtin_shufflevector(__s2_141, __s2_141, 3, 2, 1, 0); \ + int16x4_t __rev0_141; __rev0_141 = __builtin_shufflevector(__s0_141, __s0_141, __lane_reverse_64_16); \ + int16x4_t __rev1_141; __rev1_141 = __builtin_shufflevector(__s1_141, __s1_141, __lane_reverse_64_16); \ + int16x4_t __rev2_141; __rev2_141 = __builtin_shufflevector(__s2_141, __s2_141, __lane_reverse_64_16); \ __ret_141 = __noswap_vqrdmlah_s16(__rev0_141, __rev1_141, __noswap_splat_lane_s16(__rev2_141, __p3_141)); \ - __ret_141 = __builtin_shufflevector(__ret_141, __ret_141, 3, 2, 1, 0); \ + __ret_141 = __builtin_shufflevector(__ret_141, __ret_141, __lane_reverse_64_16); \ __ret_141; \ }) #endif @@ -35534,22 +35555,22 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlah_s16(int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmlshq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmlshq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int32x4_t __noswap_vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmlshq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #endif @@ -35557,22 +35578,22 @@ __ai __attribute__((target("v8.1a,neon"))) int32x4_t __noswap_vqrdmlshq_s32(int3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmlshq_s16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmlshq_s16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int16x8_t __noswap_vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmlshq_s16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #endif @@ -35580,22 +35601,22 @@ __ai __attribute__((target("v8.1a,neon"))) int16x8_t __noswap_vqrdmlshq_s16(int1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmlsh_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmlsh_s32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int32x2_t __noswap_vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmlsh_s32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 2)); return __ret; } #endif @@ -35603,22 +35624,22 @@ __ai __attribute__((target("v8.1a,neon"))) int32x2_t __noswap_vqrdmlsh_s32(int32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.1a,neon"))) int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmlsh_s16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 1)); return __ret; } #else __ai __attribute__((target("v8.1a,neon"))) int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmlsh_s16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmlsh_s16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 1)); return __ret; } #endif @@ -35638,11 +35659,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlsh_s16(int16 int32x4_t __s0_143 = __p0_143; \ int32x4_t __s1_143 = __p1_143; \ int32x2_t __s2_143 = __p2_143; \ - int32x4_t __rev0_143; __rev0_143 = __builtin_shufflevector(__s0_143, __s0_143, 3, 2, 1, 0); \ - int32x4_t __rev1_143; __rev1_143 = __builtin_shufflevector(__s1_143, __s1_143, 3, 2, 1, 0); \ - int32x2_t __rev2_143; __rev2_143 = __builtin_shufflevector(__s2_143, __s2_143, 1, 0); \ + int32x4_t __rev0_143; __rev0_143 = __builtin_shufflevector(__s0_143, __s0_143, __lane_reverse_128_32); \ + int32x4_t __rev1_143; __rev1_143 = __builtin_shufflevector(__s1_143, __s1_143, __lane_reverse_128_32); \ + int32x2_t __rev2_143; __rev2_143 = __builtin_shufflevector(__s2_143, __s2_143, __lane_reverse_64_32); \ __ret_143 = __noswap_vqrdmlshq_s32(__rev0_143, __rev1_143, __noswap_splatq_lane_s32(__rev2_143, __p3_143)); \ - __ret_143 = __builtin_shufflevector(__ret_143, __ret_143, 3, 2, 1, 0); \ + __ret_143 = __builtin_shufflevector(__ret_143, __ret_143, __lane_reverse_128_32); \ __ret_143; \ }) #endif @@ -35662,11 +35683,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlsh_s16(int16 int16x8_t __s0_145 = __p0_145; \ int16x8_t __s1_145 = __p1_145; \ int16x4_t __s2_145 = __p2_145; \ - int16x8_t __rev0_145; __rev0_145 = __builtin_shufflevector(__s0_145, __s0_145, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_145; __rev1_145 = __builtin_shufflevector(__s1_145, __s1_145, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_145; __rev2_145 = __builtin_shufflevector(__s2_145, __s2_145, 3, 2, 1, 0); \ + int16x8_t __rev0_145; __rev0_145 = __builtin_shufflevector(__s0_145, __s0_145, __lane_reverse_128_16); \ + int16x8_t __rev1_145; __rev1_145 = __builtin_shufflevector(__s1_145, __s1_145, __lane_reverse_128_16); \ + int16x4_t __rev2_145; __rev2_145 = __builtin_shufflevector(__s2_145, __s2_145, __lane_reverse_64_16); \ __ret_145 = __noswap_vqrdmlshq_s16(__rev0_145, __rev1_145, __noswap_splatq_lane_s16(__rev2_145, __p3_145)); \ - __ret_145 = __builtin_shufflevector(__ret_145, __ret_145, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_145 = __builtin_shufflevector(__ret_145, __ret_145, __lane_reverse_128_16); \ __ret_145; \ }) #endif @@ -35686,11 +35707,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlsh_s16(int16 int32x2_t __s0_147 = __p0_147; \ int32x2_t __s1_147 = __p1_147; \ int32x2_t __s2_147 = __p2_147; \ - int32x2_t __rev0_147; __rev0_147 = __builtin_shufflevector(__s0_147, __s0_147, 1, 0); \ - int32x2_t __rev1_147; __rev1_147 = __builtin_shufflevector(__s1_147, __s1_147, 1, 0); \ - int32x2_t __rev2_147; __rev2_147 = __builtin_shufflevector(__s2_147, __s2_147, 1, 0); \ + int32x2_t __rev0_147; __rev0_147 = __builtin_shufflevector(__s0_147, __s0_147, __lane_reverse_64_32); \ + int32x2_t __rev1_147; __rev1_147 = __builtin_shufflevector(__s1_147, __s1_147, __lane_reverse_64_32); \ + int32x2_t __rev2_147; __rev2_147 = __builtin_shufflevector(__s2_147, __s2_147, __lane_reverse_64_32); \ __ret_147 = __noswap_vqrdmlsh_s32(__rev0_147, __rev1_147, __noswap_splat_lane_s32(__rev2_147, __p3_147)); \ - __ret_147 = __builtin_shufflevector(__ret_147, __ret_147, 1, 0); \ + __ret_147 = __builtin_shufflevector(__ret_147, __ret_147, __lane_reverse_64_32); \ __ret_147; \ }) #endif @@ -35710,11 +35731,11 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlsh_s16(int16 int16x4_t __s0_149 = __p0_149; \ int16x4_t __s1_149 = __p1_149; \ int16x4_t __s2_149 = __p2_149; \ - int16x4_t __rev0_149; __rev0_149 = __builtin_shufflevector(__s0_149, __s0_149, 3, 2, 1, 0); \ - int16x4_t __rev1_149; __rev1_149 = __builtin_shufflevector(__s1_149, __s1_149, 3, 2, 1, 0); \ - int16x4_t __rev2_149; __rev2_149 = __builtin_shufflevector(__s2_149, __s2_149, 3, 2, 1, 0); \ + int16x4_t __rev0_149; __rev0_149 = __builtin_shufflevector(__s0_149, __s0_149, __lane_reverse_64_16); \ + int16x4_t __rev1_149; __rev1_149 = __builtin_shufflevector(__s1_149, __s1_149, __lane_reverse_64_16); \ + int16x4_t __rev2_149; __rev2_149 = __builtin_shufflevector(__s2_149, __s2_149, __lane_reverse_64_16); \ __ret_149 = __noswap_vqrdmlsh_s16(__rev0_149, __rev1_149, __noswap_splat_lane_s16(__rev2_149, __p3_149)); \ - __ret_149 = __builtin_shufflevector(__ret_149, __ret_149, 3, 2, 1, 0); \ + __ret_149 = __builtin_shufflevector(__ret_149, __ret_149, __lane_reverse_64_16); \ __ret_149; \ }) #endif @@ -35722,16 +35743,16 @@ __ai __attribute__((target("v8.1a,neon"))) int16x4_t __noswap_vqrdmlsh_s16(int16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcadd_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcadd_rot270_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcadd_rot270_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcadd_rot270_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -35739,16 +35760,16 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcadd_rot270_f16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcadd_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcadd_rot90_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcadd_rot90_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcadd_rot90_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -35756,16 +35777,16 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcadd_rot90_f16( #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcaddq_rot270_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcaddq_rot270_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -35773,16 +35794,16 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcaddq_rot270_f1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcaddq_rot90_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcaddq_rot90_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -35790,22 +35811,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcaddq_rot90_f16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #endif @@ -35813,22 +35834,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #endif @@ -35848,11 +35869,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f float16x4_t __s0_151 = __p0_151; \ float16x4_t __s1_151 = __p1_151; \ float16x4_t __s2_151 = __p2_151; \ - float16x4_t __rev0_151; __rev0_151 = __builtin_shufflevector(__s0_151, __s0_151, 3, 2, 1, 0); \ - float16x4_t __rev1_151; __rev1_151 = __builtin_shufflevector(__s1_151, __s1_151, 3, 2, 1, 0); \ - float16x4_t __rev2_151; __rev2_151 = __builtin_shufflevector(__s2_151, __s2_151, 3, 2, 1, 0); \ + float16x4_t __rev0_151; __rev0_151 = __builtin_shufflevector(__s0_151, __s0_151, __lane_reverse_64_16); \ + float16x4_t __rev1_151; __rev1_151 = __builtin_shufflevector(__s1_151, __s1_151, __lane_reverse_64_16); \ + float16x4_t __rev2_151; __rev2_151 = __builtin_shufflevector(__s2_151, __s2_151, __lane_reverse_64_16); \ __ret_151 = __noswap_vcmla_f16(__rev0_151, __rev1_151, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_151), __p3_151), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_151), __p3_151)})); \ - __ret_151 = __builtin_shufflevector(__ret_151, __ret_151, 3, 2, 1, 0); \ + __ret_151 = __builtin_shufflevector(__ret_151, __ret_151, __lane_reverse_64_16); \ __ret_151; \ }) #endif @@ -35872,11 +35893,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f float16x8_t __s0_153 = __p0_153; \ float16x8_t __s1_153 = __p1_153; \ float16x4_t __s2_153 = __p2_153; \ - float16x8_t __rev0_153; __rev0_153 = __builtin_shufflevector(__s0_153, __s0_153, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_153; __rev2_153 = __builtin_shufflevector(__s2_153, __s2_153, 3, 2, 1, 0); \ + float16x8_t __rev0_153; __rev0_153 = __builtin_shufflevector(__s0_153, __s0_153, __lane_reverse_128_16); \ + float16x8_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, __lane_reverse_128_16); \ + float16x4_t __rev2_153; __rev2_153 = __builtin_shufflevector(__s2_153, __s2_153, __lane_reverse_64_16); \ __ret_153 = __noswap_vcmlaq_f16(__rev0_153, __rev1_153, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153)})); \ - __ret_153 = __builtin_shufflevector(__ret_153, __ret_153, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_153 = __builtin_shufflevector(__ret_153, __ret_153, __lane_reverse_128_16); \ __ret_153; \ }) #endif @@ -35896,11 +35917,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f float16x4_t __s0_155 = __p0_155; \ float16x4_t __s1_155 = __p1_155; \ float16x8_t __s2_155 = __p2_155; \ - float16x4_t __rev0_155; __rev0_155 = __builtin_shufflevector(__s0_155, __s0_155, 3, 2, 1, 0); \ - float16x4_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, 3, 2, 1, 0); \ - float16x8_t __rev2_155; __rev2_155 = __builtin_shufflevector(__s2_155, __s2_155, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0_155; __rev0_155 = __builtin_shufflevector(__s0_155, __s0_155, __lane_reverse_64_16); \ + float16x4_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, __lane_reverse_64_16); \ + float16x8_t __rev2_155; __rev2_155 = __builtin_shufflevector(__s2_155, __s2_155, __lane_reverse_128_16); \ __ret_155 = __noswap_vcmla_f16(__rev0_155, __rev1_155, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_155), __p3_155), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_155), __p3_155)})); \ - __ret_155 = __builtin_shufflevector(__ret_155, __ret_155, 3, 2, 1, 0); \ + __ret_155 = __builtin_shufflevector(__ret_155, __ret_155, __lane_reverse_64_16); \ __ret_155; \ }) #endif @@ -35920,11 +35941,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f float16x8_t __s0_157 = __p0_157; \ float16x8_t __s1_157 = __p1_157; \ float16x8_t __s2_157 = __p2_157; \ - float16x8_t __rev0_157; __rev0_157 = __builtin_shufflevector(__s0_157, __s0_157, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_157; __rev2_157 = __builtin_shufflevector(__s2_157, __s2_157, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0_157; __rev0_157 = __builtin_shufflevector(__s0_157, __s0_157, __lane_reverse_128_16); \ + float16x8_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, __lane_reverse_128_16); \ + float16x8_t __rev2_157; __rev2_157 = __builtin_shufflevector(__s2_157, __s2_157, __lane_reverse_128_16); \ __ret_157 = __noswap_vcmlaq_f16(__rev0_157, __rev1_157, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157)})); \ - __ret_157 = __builtin_shufflevector(__ret_157, __ret_157, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_157 = __builtin_shufflevector(__ret_157, __ret_157, __lane_reverse_128_16); \ __ret_157; \ }) #endif @@ -35932,22 +35953,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot180_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot180_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot180_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #endif @@ -35955,22 +35976,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot180_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot180_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot180_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #endif @@ -35990,11 +36011,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_159 = __p0_159; \ float16x4_t __s1_159 = __p1_159; \ float16x4_t __s2_159 = __p2_159; \ - float16x4_t __rev0_159; __rev0_159 = __builtin_shufflevector(__s0_159, __s0_159, 3, 2, 1, 0); \ - float16x4_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, 3, 2, 1, 0); \ - float16x4_t __rev2_159; __rev2_159 = __builtin_shufflevector(__s2_159, __s2_159, 3, 2, 1, 0); \ + float16x4_t __rev0_159; __rev0_159 = __builtin_shufflevector(__s0_159, __s0_159, __lane_reverse_64_16); \ + float16x4_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, __lane_reverse_64_16); \ + float16x4_t __rev2_159; __rev2_159 = __builtin_shufflevector(__s2_159, __s2_159, __lane_reverse_64_16); \ __ret_159 = __noswap_vcmla_rot180_f16(__rev0_159, __rev1_159, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_159), __p3_159), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_159), __p3_159)})); \ - __ret_159 = __builtin_shufflevector(__ret_159, __ret_159, 3, 2, 1, 0); \ + __ret_159 = __builtin_shufflevector(__ret_159, __ret_159, __lane_reverse_64_16); \ __ret_159; \ }) #endif @@ -36014,11 +36035,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x8_t __s0_161 = __p0_161; \ float16x8_t __s1_161 = __p1_161; \ float16x4_t __s2_161 = __p2_161; \ - float16x8_t __rev0_161; __rev0_161 = __builtin_shufflevector(__s0_161, __s0_161, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_161; __rev2_161 = __builtin_shufflevector(__s2_161, __s2_161, 3, 2, 1, 0); \ + float16x8_t __rev0_161; __rev0_161 = __builtin_shufflevector(__s0_161, __s0_161, __lane_reverse_128_16); \ + float16x8_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, __lane_reverse_128_16); \ + float16x4_t __rev2_161; __rev2_161 = __builtin_shufflevector(__s2_161, __s2_161, __lane_reverse_64_16); \ __ret_161 = __noswap_vcmlaq_rot180_f16(__rev0_161, __rev1_161, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161)})); \ - __ret_161 = __builtin_shufflevector(__ret_161, __ret_161, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_161 = __builtin_shufflevector(__ret_161, __ret_161, __lane_reverse_128_16); \ __ret_161; \ }) #endif @@ -36038,11 +36059,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_163 = __p0_163; \ float16x4_t __s1_163 = __p1_163; \ float16x8_t __s2_163 = __p2_163; \ - float16x4_t __rev0_163; __rev0_163 = __builtin_shufflevector(__s0_163, __s0_163, 3, 2, 1, 0); \ - float16x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, 3, 2, 1, 0); \ - float16x8_t __rev2_163; __rev2_163 = __builtin_shufflevector(__s2_163, __s2_163, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0_163; __rev0_163 = __builtin_shufflevector(__s0_163, __s0_163, __lane_reverse_64_16); \ + float16x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, __lane_reverse_64_16); \ + float16x8_t __rev2_163; __rev2_163 = __builtin_shufflevector(__s2_163, __s2_163, __lane_reverse_128_16); \ __ret_163 = __noswap_vcmla_rot180_f16(__rev0_163, __rev1_163, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_163), __p3_163), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_163), __p3_163)})); \ - __ret_163 = __builtin_shufflevector(__ret_163, __ret_163, 3, 2, 1, 0); \ + __ret_163 = __builtin_shufflevector(__ret_163, __ret_163, __lane_reverse_64_16); \ __ret_163; \ }) #endif @@ -36062,11 +36083,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x8_t __s0_165 = __p0_165; \ float16x8_t __s1_165 = __p1_165; \ float16x8_t __s2_165 = __p2_165; \ - float16x8_t __rev0_165; __rev0_165 = __builtin_shufflevector(__s0_165, __s0_165, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_165; __rev2_165 = __builtin_shufflevector(__s2_165, __s2_165, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0_165; __rev0_165 = __builtin_shufflevector(__s0_165, __s0_165, __lane_reverse_128_16); \ + float16x8_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, __lane_reverse_128_16); \ + float16x8_t __rev2_165; __rev2_165 = __builtin_shufflevector(__s2_165, __s2_165, __lane_reverse_128_16); \ __ret_165 = __noswap_vcmlaq_rot180_f16(__rev0_165, __rev1_165, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165)})); \ - __ret_165 = __builtin_shufflevector(__ret_165, __ret_165, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_165 = __builtin_shufflevector(__ret_165, __ret_165, __lane_reverse_128_16); \ __ret_165; \ }) #endif @@ -36074,22 +36095,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot270_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot270_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot270_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #endif @@ -36097,22 +36118,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot270_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot270_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot270_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #endif @@ -36132,11 +36153,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_167 = __p0_167; \ float16x4_t __s1_167 = __p1_167; \ float16x4_t __s2_167 = __p2_167; \ - float16x4_t __rev0_167; __rev0_167 = __builtin_shufflevector(__s0_167, __s0_167, 3, 2, 1, 0); \ - float16x4_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, 3, 2, 1, 0); \ - float16x4_t __rev2_167; __rev2_167 = __builtin_shufflevector(__s2_167, __s2_167, 3, 2, 1, 0); \ + float16x4_t __rev0_167; __rev0_167 = __builtin_shufflevector(__s0_167, __s0_167, __lane_reverse_64_16); \ + float16x4_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, __lane_reverse_64_16); \ + float16x4_t __rev2_167; __rev2_167 = __builtin_shufflevector(__s2_167, __s2_167, __lane_reverse_64_16); \ __ret_167 = __noswap_vcmla_rot270_f16(__rev0_167, __rev1_167, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_167), __p3_167), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_167), __p3_167)})); \ - __ret_167 = __builtin_shufflevector(__ret_167, __ret_167, 3, 2, 1, 0); \ + __ret_167 = __builtin_shufflevector(__ret_167, __ret_167, __lane_reverse_64_16); \ __ret_167; \ }) #endif @@ -36156,11 +36177,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x8_t __s0_169 = __p0_169; \ float16x8_t __s1_169 = __p1_169; \ float16x4_t __s2_169 = __p2_169; \ - float16x8_t __rev0_169; __rev0_169 = __builtin_shufflevector(__s0_169, __s0_169, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_169; __rev2_169 = __builtin_shufflevector(__s2_169, __s2_169, 3, 2, 1, 0); \ + float16x8_t __rev0_169; __rev0_169 = __builtin_shufflevector(__s0_169, __s0_169, __lane_reverse_128_16); \ + float16x8_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, __lane_reverse_128_16); \ + float16x4_t __rev2_169; __rev2_169 = __builtin_shufflevector(__s2_169, __s2_169, __lane_reverse_64_16); \ __ret_169 = __noswap_vcmlaq_rot270_f16(__rev0_169, __rev1_169, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169)})); \ - __ret_169 = __builtin_shufflevector(__ret_169, __ret_169, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_169 = __builtin_shufflevector(__ret_169, __ret_169, __lane_reverse_128_16); \ __ret_169; \ }) #endif @@ -36180,11 +36201,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_171 = __p0_171; \ float16x4_t __s1_171 = __p1_171; \ float16x8_t __s2_171 = __p2_171; \ - float16x4_t __rev0_171; __rev0_171 = __builtin_shufflevector(__s0_171, __s0_171, 3, 2, 1, 0); \ - float16x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, 3, 2, 1, 0); \ - float16x8_t __rev2_171; __rev2_171 = __builtin_shufflevector(__s2_171, __s2_171, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0_171; __rev0_171 = __builtin_shufflevector(__s0_171, __s0_171, __lane_reverse_64_16); \ + float16x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, __lane_reverse_64_16); \ + float16x8_t __rev2_171; __rev2_171 = __builtin_shufflevector(__s2_171, __s2_171, __lane_reverse_128_16); \ __ret_171 = __noswap_vcmla_rot270_f16(__rev0_171, __rev1_171, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_171), __p3_171), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_171), __p3_171)})); \ - __ret_171 = __builtin_shufflevector(__ret_171, __ret_171, 3, 2, 1, 0); \ + __ret_171 = __builtin_shufflevector(__ret_171, __ret_171, __lane_reverse_64_16); \ __ret_171; \ }) #endif @@ -36204,11 +36225,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x8_t __s0_173 = __p0_173; \ float16x8_t __s1_173 = __p1_173; \ float16x8_t __s2_173 = __p2_173; \ - float16x8_t __rev0_173; __rev0_173 = __builtin_shufflevector(__s0_173, __s0_173, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_173; __rev2_173 = __builtin_shufflevector(__s2_173, __s2_173, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0_173; __rev0_173 = __builtin_shufflevector(__s0_173, __s0_173, __lane_reverse_128_16); \ + float16x8_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, __lane_reverse_128_16); \ + float16x8_t __rev2_173; __rev2_173 = __builtin_shufflevector(__s2_173, __s2_173, __lane_reverse_128_16); \ __ret_173 = __noswap_vcmlaq_rot270_f16(__rev0_173, __rev1_173, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173)})); \ - __ret_173 = __builtin_shufflevector(__ret_173, __ret_173, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_173 = __builtin_shufflevector(__ret_173, __ret_173, __lane_reverse_128_16); \ __ret_173; \ }) #endif @@ -36216,22 +36237,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot90_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot90_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcmlaq_rot90_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 40)); return __ret; } #endif @@ -36239,22 +36260,22 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x8_t __noswap_vcmlaq_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot90_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #else __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot90_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcmla_rot90_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 8)); return __ret; } #endif @@ -36274,11 +36295,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_175 = __p0_175; \ float16x4_t __s1_175 = __p1_175; \ float16x4_t __s2_175 = __p2_175; \ - float16x4_t __rev0_175; __rev0_175 = __builtin_shufflevector(__s0_175, __s0_175, 3, 2, 1, 0); \ - float16x4_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, 3, 2, 1, 0); \ - float16x4_t __rev2_175; __rev2_175 = __builtin_shufflevector(__s2_175, __s2_175, 3, 2, 1, 0); \ + float16x4_t __rev0_175; __rev0_175 = __builtin_shufflevector(__s0_175, __s0_175, __lane_reverse_64_16); \ + float16x4_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, __lane_reverse_64_16); \ + float16x4_t __rev2_175; __rev2_175 = __builtin_shufflevector(__s2_175, __s2_175, __lane_reverse_64_16); \ __ret_175 = __noswap_vcmla_rot90_f16(__rev0_175, __rev1_175, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_175), __p3_175), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_175), __p3_175)})); \ - __ret_175 = __builtin_shufflevector(__ret_175, __ret_175, 3, 2, 1, 0); \ + __ret_175 = __builtin_shufflevector(__ret_175, __ret_175, __lane_reverse_64_16); \ __ret_175; \ }) #endif @@ -36298,11 +36319,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x8_t __s0_177 = __p0_177; \ float16x8_t __s1_177 = __p1_177; \ float16x4_t __s2_177 = __p2_177; \ - float16x8_t __rev0_177; __rev0_177 = __builtin_shufflevector(__s0_177, __s0_177, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_177; __rev2_177 = __builtin_shufflevector(__s2_177, __s2_177, 3, 2, 1, 0); \ + float16x8_t __rev0_177; __rev0_177 = __builtin_shufflevector(__s0_177, __s0_177, __lane_reverse_128_16); \ + float16x8_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, __lane_reverse_128_16); \ + float16x4_t __rev2_177; __rev2_177 = __builtin_shufflevector(__s2_177, __s2_177, __lane_reverse_64_16); \ __ret_177 = __noswap_vcmlaq_rot90_f16(__rev0_177, __rev1_177, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177)})); \ - __ret_177 = __builtin_shufflevector(__ret_177, __ret_177, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_177 = __builtin_shufflevector(__ret_177, __ret_177, __lane_reverse_128_16); \ __ret_177; \ }) #endif @@ -36322,11 +36343,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_179 = __p0_179; \ float16x4_t __s1_179 = __p1_179; \ float16x8_t __s2_179 = __p2_179; \ - float16x4_t __rev0_179; __rev0_179 = __builtin_shufflevector(__s0_179, __s0_179, 3, 2, 1, 0); \ - float16x4_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, 3, 2, 1, 0); \ - float16x8_t __rev2_179; __rev2_179 = __builtin_shufflevector(__s2_179, __s2_179, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0_179; __rev0_179 = __builtin_shufflevector(__s0_179, __s0_179, __lane_reverse_64_16); \ + float16x4_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, __lane_reverse_64_16); \ + float16x8_t __rev2_179; __rev2_179 = __builtin_shufflevector(__s2_179, __s2_179, __lane_reverse_128_16); \ __ret_179 = __noswap_vcmla_rot90_f16(__rev0_179, __rev1_179, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_179), __p3_179), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_179), __p3_179)})); \ - __ret_179 = __builtin_shufflevector(__ret_179, __ret_179, 3, 2, 1, 0); \ + __ret_179 = __builtin_shufflevector(__ret_179, __ret_179, __lane_reverse_64_16); \ __ret_179; \ }) #endif @@ -36346,11 +36367,11 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x8_t __s0_181 = __p0_181; \ float16x8_t __s1_181 = __p1_181; \ float16x8_t __s2_181 = __p2_181; \ - float16x8_t __rev0_181; __rev0_181 = __builtin_shufflevector(__s0_181, __s0_181, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_181; __rev2_181 = __builtin_shufflevector(__s2_181, __s2_181, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0_181; __rev0_181 = __builtin_shufflevector(__s0_181, __s0_181, __lane_reverse_128_16); \ + float16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, __lane_reverse_128_16); \ + float16x8_t __rev2_181; __rev2_181 = __builtin_shufflevector(__s2_181, __s2_181, __lane_reverse_128_16); \ __ret_181 = __noswap_vcmlaq_rot90_f16(__rev0_181, __rev1_181, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181)})); \ - __ret_181 = __builtin_shufflevector(__ret_181, __ret_181, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_181 = __builtin_shufflevector(__ret_181, __ret_181, __lane_reverse_128_16); \ __ret_181; \ }) #endif @@ -36358,16 +36379,16 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcadd_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcadd_rot270_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcadd_rot270_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcadd_rot270_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -36375,16 +36396,16 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcadd_rot270_f32(float32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcadd_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcadd_rot90_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcadd_rot90_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcadd_rot90_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -36392,16 +36413,16 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcadd_rot90_f32(float32x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcaddq_rot270_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcaddq_rot270_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -36409,16 +36430,16 @@ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcaddq_rot270_f32(float32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcaddq_rot90_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcaddq_rot90_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -36426,22 +36447,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcaddq_rot90_f32(float32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -36449,22 +36470,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_f32(float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -36484,11 +36505,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float3 float32x2_t __s0_183 = __p0_183; \ float32x2_t __s1_183 = __p1_183; \ float32x2_t __s2_183 = __p2_183; \ - float32x2_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, 1, 0); \ - float32x2_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, 1, 0); \ - float32x2_t __rev2_183; __rev2_183 = __builtin_shufflevector(__s2_183, __s2_183, 1, 0); \ + float32x2_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, __lane_reverse_64_32); \ + float32x2_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, __lane_reverse_64_32); \ + float32x2_t __rev2_183; __rev2_183 = __builtin_shufflevector(__s2_183, __s2_183, __lane_reverse_64_32); \ __ret_183 = __noswap_vcmla_f32(__rev0_183, __rev1_183, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_183), __p3_183)})); \ - __ret_183 = __builtin_shufflevector(__ret_183, __ret_183, 1, 0); \ + __ret_183 = __builtin_shufflevector(__ret_183, __ret_183, __lane_reverse_64_32); \ __ret_183; \ }) #endif @@ -36508,11 +36529,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float3 float32x4_t __s0_185 = __p0_185; \ float32x4_t __s1_185 = __p1_185; \ float32x2_t __s2_185 = __p2_185; \ - float32x4_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, 3, 2, 1, 0); \ - float32x4_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, 3, 2, 1, 0); \ - float32x2_t __rev2_185; __rev2_185 = __builtin_shufflevector(__s2_185, __s2_185, 1, 0); \ + float32x4_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, __lane_reverse_128_32); \ + float32x4_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, __lane_reverse_128_32); \ + float32x2_t __rev2_185; __rev2_185 = __builtin_shufflevector(__s2_185, __s2_185, __lane_reverse_64_32); \ __ret_185 = __noswap_vcmlaq_f32(__rev0_185, __rev1_185, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_185), __p3_185), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_185), __p3_185)})); \ - __ret_185 = __builtin_shufflevector(__ret_185, __ret_185, 3, 2, 1, 0); \ + __ret_185 = __builtin_shufflevector(__ret_185, __ret_185, __lane_reverse_128_32); \ __ret_185; \ }) #endif @@ -36532,11 +36553,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float3 float32x2_t __s0_187 = __p0_187; \ float32x2_t __s1_187 = __p1_187; \ float32x4_t __s2_187 = __p2_187; \ - float32x2_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, 1, 0); \ - float32x2_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, 1, 0); \ - float32x4_t __rev2_187; __rev2_187 = __builtin_shufflevector(__s2_187, __s2_187, 3, 2, 1, 0); \ + float32x2_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, __lane_reverse_64_32); \ + float32x2_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, __lane_reverse_64_32); \ + float32x4_t __rev2_187; __rev2_187 = __builtin_shufflevector(__s2_187, __s2_187, __lane_reverse_128_32); \ __ret_187 = __noswap_vcmla_f32(__rev0_187, __rev1_187, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_187), __p3_187)})); \ - __ret_187 = __builtin_shufflevector(__ret_187, __ret_187, 1, 0); \ + __ret_187 = __builtin_shufflevector(__ret_187, __ret_187, __lane_reverse_64_32); \ __ret_187; \ }) #endif @@ -36556,11 +36577,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float3 float32x4_t __s0_189 = __p0_189; \ float32x4_t __s1_189 = __p1_189; \ float32x4_t __s2_189 = __p2_189; \ - float32x4_t __rev0_189; __rev0_189 = __builtin_shufflevector(__s0_189, __s0_189, 3, 2, 1, 0); \ - float32x4_t __rev1_189; __rev1_189 = __builtin_shufflevector(__s1_189, __s1_189, 3, 2, 1, 0); \ - float32x4_t __rev2_189; __rev2_189 = __builtin_shufflevector(__s2_189, __s2_189, 3, 2, 1, 0); \ + float32x4_t __rev0_189; __rev0_189 = __builtin_shufflevector(__s0_189, __s0_189, __lane_reverse_128_32); \ + float32x4_t __rev1_189; __rev1_189 = __builtin_shufflevector(__s1_189, __s1_189, __lane_reverse_128_32); \ + float32x4_t __rev2_189; __rev2_189 = __builtin_shufflevector(__s2_189, __s2_189, __lane_reverse_128_32); \ __ret_189 = __noswap_vcmlaq_f32(__rev0_189, __rev1_189, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_189), __p3_189), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_189), __p3_189)})); \ - __ret_189 = __builtin_shufflevector(__ret_189, __ret_189, 3, 2, 1, 0); \ + __ret_189 = __builtin_shufflevector(__ret_189, __ret_189, __lane_reverse_128_32); \ __ret_189; \ }) #endif @@ -36568,22 +36589,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot180_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot180_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot180_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -36591,22 +36612,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_rot180_f3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot180_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot180_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot180_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -36626,11 +36647,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32 float32x2_t __s0_191 = __p0_191; \ float32x2_t __s1_191 = __p1_191; \ float32x2_t __s2_191 = __p2_191; \ - float32x2_t __rev0_191; __rev0_191 = __builtin_shufflevector(__s0_191, __s0_191, 1, 0); \ - float32x2_t __rev1_191; __rev1_191 = __builtin_shufflevector(__s1_191, __s1_191, 1, 0); \ - float32x2_t __rev2_191; __rev2_191 = __builtin_shufflevector(__s2_191, __s2_191, 1, 0); \ + float32x2_t __rev0_191; __rev0_191 = __builtin_shufflevector(__s0_191, __s0_191, __lane_reverse_64_32); \ + float32x2_t __rev1_191; __rev1_191 = __builtin_shufflevector(__s1_191, __s1_191, __lane_reverse_64_32); \ + float32x2_t __rev2_191; __rev2_191 = __builtin_shufflevector(__s2_191, __s2_191, __lane_reverse_64_32); \ __ret_191 = __noswap_vcmla_rot180_f32(__rev0_191, __rev1_191, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_191), __p3_191)})); \ - __ret_191 = __builtin_shufflevector(__ret_191, __ret_191, 1, 0); \ + __ret_191 = __builtin_shufflevector(__ret_191, __ret_191, __lane_reverse_64_32); \ __ret_191; \ }) #endif @@ -36650,11 +36671,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32 float32x4_t __s0_193 = __p0_193; \ float32x4_t __s1_193 = __p1_193; \ float32x2_t __s2_193 = __p2_193; \ - float32x4_t __rev0_193; __rev0_193 = __builtin_shufflevector(__s0_193, __s0_193, 3, 2, 1, 0); \ - float32x4_t __rev1_193; __rev1_193 = __builtin_shufflevector(__s1_193, __s1_193, 3, 2, 1, 0); \ - float32x2_t __rev2_193; __rev2_193 = __builtin_shufflevector(__s2_193, __s2_193, 1, 0); \ + float32x4_t __rev0_193; __rev0_193 = __builtin_shufflevector(__s0_193, __s0_193, __lane_reverse_128_32); \ + float32x4_t __rev1_193; __rev1_193 = __builtin_shufflevector(__s1_193, __s1_193, __lane_reverse_128_32); \ + float32x2_t __rev2_193; __rev2_193 = __builtin_shufflevector(__s2_193, __s2_193, __lane_reverse_64_32); \ __ret_193 = __noswap_vcmlaq_rot180_f32(__rev0_193, __rev1_193, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_193), __p3_193), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_193), __p3_193)})); \ - __ret_193 = __builtin_shufflevector(__ret_193, __ret_193, 3, 2, 1, 0); \ + __ret_193 = __builtin_shufflevector(__ret_193, __ret_193, __lane_reverse_128_32); \ __ret_193; \ }) #endif @@ -36674,11 +36695,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32 float32x2_t __s0_195 = __p0_195; \ float32x2_t __s1_195 = __p1_195; \ float32x4_t __s2_195 = __p2_195; \ - float32x2_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, 1, 0); \ - float32x2_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, 1, 0); \ - float32x4_t __rev2_195; __rev2_195 = __builtin_shufflevector(__s2_195, __s2_195, 3, 2, 1, 0); \ + float32x2_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, __lane_reverse_64_32); \ + float32x2_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, __lane_reverse_64_32); \ + float32x4_t __rev2_195; __rev2_195 = __builtin_shufflevector(__s2_195, __s2_195, __lane_reverse_128_32); \ __ret_195 = __noswap_vcmla_rot180_f32(__rev0_195, __rev1_195, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_195), __p3_195)})); \ - __ret_195 = __builtin_shufflevector(__ret_195, __ret_195, 1, 0); \ + __ret_195 = __builtin_shufflevector(__ret_195, __ret_195, __lane_reverse_64_32); \ __ret_195; \ }) #endif @@ -36698,11 +36719,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32 float32x4_t __s0_197 = __p0_197; \ float32x4_t __s1_197 = __p1_197; \ float32x4_t __s2_197 = __p2_197; \ - float32x4_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, 3, 2, 1, 0); \ - float32x4_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, 3, 2, 1, 0); \ - float32x4_t __rev2_197; __rev2_197 = __builtin_shufflevector(__s2_197, __s2_197, 3, 2, 1, 0); \ + float32x4_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, __lane_reverse_128_32); \ + float32x4_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, __lane_reverse_128_32); \ + float32x4_t __rev2_197; __rev2_197 = __builtin_shufflevector(__s2_197, __s2_197, __lane_reverse_128_32); \ __ret_197 = __noswap_vcmlaq_rot180_f32(__rev0_197, __rev1_197, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_197), __p3_197), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_197), __p3_197)})); \ - __ret_197 = __builtin_shufflevector(__ret_197, __ret_197, 3, 2, 1, 0); \ + __ret_197 = __builtin_shufflevector(__ret_197, __ret_197, __lane_reverse_128_32); \ __ret_197; \ }) #endif @@ -36710,22 +36731,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot270_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot270_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot270_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -36733,22 +36754,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_rot270_f3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot270_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot270_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot270_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -36768,11 +36789,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32 float32x2_t __s0_199 = __p0_199; \ float32x2_t __s1_199 = __p1_199; \ float32x2_t __s2_199 = __p2_199; \ - float32x2_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, 1, 0); \ - float32x2_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, 1, 0); \ - float32x2_t __rev2_199; __rev2_199 = __builtin_shufflevector(__s2_199, __s2_199, 1, 0); \ + float32x2_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, __lane_reverse_64_32); \ + float32x2_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, __lane_reverse_64_32); \ + float32x2_t __rev2_199; __rev2_199 = __builtin_shufflevector(__s2_199, __s2_199, __lane_reverse_64_32); \ __ret_199 = __noswap_vcmla_rot270_f32(__rev0_199, __rev1_199, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_199), __p3_199)})); \ - __ret_199 = __builtin_shufflevector(__ret_199, __ret_199, 1, 0); \ + __ret_199 = __builtin_shufflevector(__ret_199, __ret_199, __lane_reverse_64_32); \ __ret_199; \ }) #endif @@ -36792,11 +36813,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32 float32x4_t __s0_201 = __p0_201; \ float32x4_t __s1_201 = __p1_201; \ float32x2_t __s2_201 = __p2_201; \ - float32x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, 3, 2, 1, 0); \ - float32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, 3, 2, 1, 0); \ - float32x2_t __rev2_201; __rev2_201 = __builtin_shufflevector(__s2_201, __s2_201, 1, 0); \ + float32x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, __lane_reverse_128_32); \ + float32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, __lane_reverse_128_32); \ + float32x2_t __rev2_201; __rev2_201 = __builtin_shufflevector(__s2_201, __s2_201, __lane_reverse_64_32); \ __ret_201 = __noswap_vcmlaq_rot270_f32(__rev0_201, __rev1_201, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_201), __p3_201), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_201), __p3_201)})); \ - __ret_201 = __builtin_shufflevector(__ret_201, __ret_201, 3, 2, 1, 0); \ + __ret_201 = __builtin_shufflevector(__ret_201, __ret_201, __lane_reverse_128_32); \ __ret_201; \ }) #endif @@ -36816,11 +36837,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32 float32x2_t __s0_203 = __p0_203; \ float32x2_t __s1_203 = __p1_203; \ float32x4_t __s2_203 = __p2_203; \ - float32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, 1, 0); \ - float32x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, 1, 0); \ - float32x4_t __rev2_203; __rev2_203 = __builtin_shufflevector(__s2_203, __s2_203, 3, 2, 1, 0); \ + float32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, __lane_reverse_64_32); \ + float32x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, __lane_reverse_64_32); \ + float32x4_t __rev2_203; __rev2_203 = __builtin_shufflevector(__s2_203, __s2_203, __lane_reverse_128_32); \ __ret_203 = __noswap_vcmla_rot270_f32(__rev0_203, __rev1_203, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_203), __p3_203)})); \ - __ret_203 = __builtin_shufflevector(__ret_203, __ret_203, 1, 0); \ + __ret_203 = __builtin_shufflevector(__ret_203, __ret_203, __lane_reverse_64_32); \ __ret_203; \ }) #endif @@ -36840,11 +36861,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32 float32x4_t __s0_205 = __p0_205; \ float32x4_t __s1_205 = __p1_205; \ float32x4_t __s2_205 = __p2_205; \ - float32x4_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, 3, 2, 1, 0); \ - float32x4_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, 3, 2, 1, 0); \ - float32x4_t __rev2_205; __rev2_205 = __builtin_shufflevector(__s2_205, __s2_205, 3, 2, 1, 0); \ + float32x4_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, __lane_reverse_128_32); \ + float32x4_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, __lane_reverse_128_32); \ + float32x4_t __rev2_205; __rev2_205 = __builtin_shufflevector(__s2_205, __s2_205, __lane_reverse_128_32); \ __ret_205 = __noswap_vcmlaq_rot270_f32(__rev0_205, __rev1_205, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_205), __p3_205), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_205), __p3_205)})); \ - __ret_205 = __builtin_shufflevector(__ret_205, __ret_205, 3, 2, 1, 0); \ + __ret_205 = __builtin_shufflevector(__ret_205, __ret_205, __lane_reverse_128_32); \ __ret_205; \ }) #endif @@ -36852,22 +36873,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot90_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot90_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcmlaq_rot90_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -36875,22 +36896,22 @@ __ai __attribute__((target("v8.3a,neon"))) float32x4_t __noswap_vcmlaq_rot90_f32 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot90_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot90_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcmla_rot90_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -36910,11 +36931,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32( float32x2_t __s0_207 = __p0_207; \ float32x2_t __s1_207 = __p1_207; \ float32x2_t __s2_207 = __p2_207; \ - float32x2_t __rev0_207; __rev0_207 = __builtin_shufflevector(__s0_207, __s0_207, 1, 0); \ - float32x2_t __rev1_207; __rev1_207 = __builtin_shufflevector(__s1_207, __s1_207, 1, 0); \ - float32x2_t __rev2_207; __rev2_207 = __builtin_shufflevector(__s2_207, __s2_207, 1, 0); \ + float32x2_t __rev0_207; __rev0_207 = __builtin_shufflevector(__s0_207, __s0_207, __lane_reverse_64_32); \ + float32x2_t __rev1_207; __rev1_207 = __builtin_shufflevector(__s1_207, __s1_207, __lane_reverse_64_32); \ + float32x2_t __rev2_207; __rev2_207 = __builtin_shufflevector(__s2_207, __s2_207, __lane_reverse_64_32); \ __ret_207 = __noswap_vcmla_rot90_f32(__rev0_207, __rev1_207, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_207), __p3_207)})); \ - __ret_207 = __builtin_shufflevector(__ret_207, __ret_207, 1, 0); \ + __ret_207 = __builtin_shufflevector(__ret_207, __ret_207, __lane_reverse_64_32); \ __ret_207; \ }) #endif @@ -36934,11 +36955,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32( float32x4_t __s0_209 = __p0_209; \ float32x4_t __s1_209 = __p1_209; \ float32x2_t __s2_209 = __p2_209; \ - float32x4_t __rev0_209; __rev0_209 = __builtin_shufflevector(__s0_209, __s0_209, 3, 2, 1, 0); \ - float32x4_t __rev1_209; __rev1_209 = __builtin_shufflevector(__s1_209, __s1_209, 3, 2, 1, 0); \ - float32x2_t __rev2_209; __rev2_209 = __builtin_shufflevector(__s2_209, __s2_209, 1, 0); \ + float32x4_t __rev0_209; __rev0_209 = __builtin_shufflevector(__s0_209, __s0_209, __lane_reverse_128_32); \ + float32x4_t __rev1_209; __rev1_209 = __builtin_shufflevector(__s1_209, __s1_209, __lane_reverse_128_32); \ + float32x2_t __rev2_209; __rev2_209 = __builtin_shufflevector(__s2_209, __s2_209, __lane_reverse_64_32); \ __ret_209 = __noswap_vcmlaq_rot90_f32(__rev0_209, __rev1_209, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_209), __p3_209), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_209), __p3_209)})); \ - __ret_209 = __builtin_shufflevector(__ret_209, __ret_209, 3, 2, 1, 0); \ + __ret_209 = __builtin_shufflevector(__ret_209, __ret_209, __lane_reverse_128_32); \ __ret_209; \ }) #endif @@ -36958,11 +36979,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32( float32x2_t __s0_211 = __p0_211; \ float32x2_t __s1_211 = __p1_211; \ float32x4_t __s2_211 = __p2_211; \ - float32x2_t __rev0_211; __rev0_211 = __builtin_shufflevector(__s0_211, __s0_211, 1, 0); \ - float32x2_t __rev1_211; __rev1_211 = __builtin_shufflevector(__s1_211, __s1_211, 1, 0); \ - float32x4_t __rev2_211; __rev2_211 = __builtin_shufflevector(__s2_211, __s2_211, 3, 2, 1, 0); \ + float32x2_t __rev0_211; __rev0_211 = __builtin_shufflevector(__s0_211, __s0_211, __lane_reverse_64_32); \ + float32x2_t __rev1_211; __rev1_211 = __builtin_shufflevector(__s1_211, __s1_211, __lane_reverse_64_32); \ + float32x4_t __rev2_211; __rev2_211 = __builtin_shufflevector(__s2_211, __s2_211, __lane_reverse_128_32); \ __ret_211 = __noswap_vcmla_rot90_f32(__rev0_211, __rev1_211, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_211), __p3_211)})); \ - __ret_211 = __builtin_shufflevector(__ret_211, __ret_211, 1, 0); \ + __ret_211 = __builtin_shufflevector(__ret_211, __ret_211, __lane_reverse_64_32); \ __ret_211; \ }) #endif @@ -36982,11 +37003,11 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32( float32x4_t __s0_213 = __p0_213; \ float32x4_t __s1_213 = __p1_213; \ float32x4_t __s2_213 = __p2_213; \ - float32x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, 3, 2, 1, 0); \ - float32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, 3, 2, 1, 0); \ - float32x4_t __rev2_213; __rev2_213 = __builtin_shufflevector(__s2_213, __s2_213, 3, 2, 1, 0); \ + float32x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, __lane_reverse_128_32); \ + float32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, __lane_reverse_128_32); \ + float32x4_t __rev2_213; __rev2_213 = __builtin_shufflevector(__s2_213, __s2_213, __lane_reverse_128_32); \ __ret_213 = __noswap_vcmlaq_rot90_f32(__rev0_213, __rev1_213, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_213), __p3_213), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_213), __p3_213)})); \ - __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, 3, 2, 1, 0); \ + __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, __lane_reverse_128_32); \ __ret_213; \ }) #endif @@ -36995,20 +37016,20 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32( #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__p0, 11); + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon___a32_vcvt_bf16_f32(__builtin_bit_cast(int8x16_t, __p0), 11)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__rev0, 11); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon___a32_vcvt_bf16_f32(__builtin_bit_cast(int8x16_t, __rev0), 11)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t __noswap___a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__p0, 11); + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon___a32_vcvt_bf16_f32(__builtin_bit_cast(int8x16_t, __p0), 11)); return __ret; } #endif @@ -37022,9 +37043,9 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t #else __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap___a32_vcvt_bf16_f32(__rev0); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -37038,10 +37059,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloa #else __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vcombine_bf16(__noswap___a32_vcvt_bf16_f32(__rev1), __noswap_vget_low_bf16(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -37049,277 +37070,277 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - __ret = vcombine_bf16((bfloat16x4_t)(0ULL), __a32_vcvt_bf16_f32(__p0)); + __ret = vcombine_bf16(__builtin_bit_cast(bfloat16x4_t, 0ULL), __a32_vcvt_bf16_f32(__p0)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = __noswap_vcombine_bf16((bfloat16x4_t)(0ULL), __noswap___a32_vcvt_bf16_f32(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __noswap_vcombine_bf16(__builtin_bit_cast(bfloat16x4_t, 0ULL), __noswap___a32_vcvt_bf16_f32(__rev0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif __ai __attribute__((target("bf16,neon"))) poly8x8_t vreinterpret_p8_bf16(bfloat16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly64x1_t vreinterpret_p64_bf16(bfloat16x4_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly16x4_t vreinterpret_p16_bf16(bfloat16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly8x16_t vreinterpretq_p8_bf16(bfloat16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly64x2_t vreinterpretq_p64_bf16(bfloat16x8_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly16x8_t vreinterpretq_p16_bf16(bfloat16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint8x16_t vreinterpretq_u8_bf16(bfloat16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint32x4_t vreinterpretq_u32_bf16(bfloat16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint64x2_t vreinterpretq_u64_bf16(bfloat16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint16x8_t vreinterpretq_u16_bf16(bfloat16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int8x16_t vreinterpretq_s8_bf16(bfloat16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x4_t vreinterpretq_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float16x8_t vreinterpretq_f16_bf16(bfloat16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int32x4_t vreinterpretq_s32_bf16(bfloat16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int64x2_t vreinterpretq_s64_bf16(bfloat16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int16x8_t vreinterpretq_s16_bf16(bfloat16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint8x8_t vreinterpret_u8_bf16(bfloat16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint32x2_t vreinterpret_u32_bf16(bfloat16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint64x1_t vreinterpret_u64_bf16(bfloat16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint16x4_t vreinterpret_u16_bf16(bfloat16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int8x8_t vreinterpret_s8_bf16(bfloat16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x2_t vreinterpret_f32_bf16(bfloat16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float16x4_t vreinterpret_f16_bf16(bfloat16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int32x2_t vreinterpret_s32_bf16(bfloat16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int64x1_t vreinterpret_s64_bf16(bfloat16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int16x4_t vreinterpret_s16_bf16(bfloat16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p8(poly8x16_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p64(poly64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p16(poly16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u8(uint8x16_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u32(uint32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u64(uint64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u16(uint16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s8(int8x16_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_f16(float16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s32(int32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s64(int64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s16(int16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_p8(poly8x8_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_p64(poly64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_p16(poly16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u8(uint8x8_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u32(uint32x2_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u64(uint64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u16(uint16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s8(int8x8_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_f32(float32x2_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_f16(float16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s32(int32x2_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s64(int64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -37335,10 +37356,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int32x4_t __ret_215; \ int32x4_t __s0_215 = __p0_215; \ int32x2_t __s1_215 = __p1_215; \ - int32x4_t __rev0_215; __rev0_215 = __builtin_shufflevector(__s0_215, __s0_215, 3, 2, 1, 0); \ - int32x2_t __rev1_215; __rev1_215 = __builtin_shufflevector(__s1_215, __s1_215, 1, 0); \ + int32x4_t __rev0_215; __rev0_215 = __builtin_shufflevector(__s0_215, __s0_215, __lane_reverse_128_32); \ + int32x2_t __rev1_215; __rev1_215 = __builtin_shufflevector(__s1_215, __s1_215, __lane_reverse_64_32); \ __ret_215 = __noswap_vqdmulhq_s32(__rev0_215, __noswap_splatq_lane_s32(__rev1_215, __p2_215)); \ - __ret_215 = __builtin_shufflevector(__ret_215, __ret_215, 3, 2, 1, 0); \ + __ret_215 = __builtin_shufflevector(__ret_215, __ret_215, __lane_reverse_128_32); \ __ret_215; \ }) #endif @@ -37356,10 +37377,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int16x8_t __ret_217; \ int16x8_t __s0_217 = __p0_217; \ int16x4_t __s1_217 = __p1_217; \ - int16x8_t __rev0_217; __rev0_217 = __builtin_shufflevector(__s0_217, __s0_217, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_217; __rev1_217 = __builtin_shufflevector(__s1_217, __s1_217, 3, 2, 1, 0); \ + int16x8_t __rev0_217; __rev0_217 = __builtin_shufflevector(__s0_217, __s0_217, __lane_reverse_128_16); \ + int16x4_t __rev1_217; __rev1_217 = __builtin_shufflevector(__s1_217, __s1_217, __lane_reverse_64_16); \ __ret_217 = __noswap_vqdmulhq_s16(__rev0_217, __noswap_splatq_lane_s16(__rev1_217, __p2_217)); \ - __ret_217 = __builtin_shufflevector(__ret_217, __ret_217, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_217 = __builtin_shufflevector(__ret_217, __ret_217, __lane_reverse_128_16); \ __ret_217; \ }) #endif @@ -37377,10 +37398,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int32x2_t __ret_219; \ int32x2_t __s0_219 = __p0_219; \ int32x2_t __s1_219 = __p1_219; \ - int32x2_t __rev0_219; __rev0_219 = __builtin_shufflevector(__s0_219, __s0_219, 1, 0); \ - int32x2_t __rev1_219; __rev1_219 = __builtin_shufflevector(__s1_219, __s1_219, 1, 0); \ + int32x2_t __rev0_219; __rev0_219 = __builtin_shufflevector(__s0_219, __s0_219, __lane_reverse_64_32); \ + int32x2_t __rev1_219; __rev1_219 = __builtin_shufflevector(__s1_219, __s1_219, __lane_reverse_64_32); \ __ret_219 = __noswap_vqdmulh_s32(__rev0_219, __noswap_splat_lane_s32(__rev1_219, __p2_219)); \ - __ret_219 = __builtin_shufflevector(__ret_219, __ret_219, 1, 0); \ + __ret_219 = __builtin_shufflevector(__ret_219, __ret_219, __lane_reverse_64_32); \ __ret_219; \ }) #endif @@ -37398,10 +37419,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int16x4_t __ret_221; \ int16x4_t __s0_221 = __p0_221; \ int16x4_t __s1_221 = __p1_221; \ - int16x4_t __rev0_221; __rev0_221 = __builtin_shufflevector(__s0_221, __s0_221, 3, 2, 1, 0); \ - int16x4_t __rev1_221; __rev1_221 = __builtin_shufflevector(__s1_221, __s1_221, 3, 2, 1, 0); \ + int16x4_t __rev0_221; __rev0_221 = __builtin_shufflevector(__s0_221, __s0_221, __lane_reverse_64_16); \ + int16x4_t __rev1_221; __rev1_221 = __builtin_shufflevector(__s1_221, __s1_221, __lane_reverse_64_16); \ __ret_221 = __noswap_vqdmulh_s16(__rev0_221, __noswap_splat_lane_s16(__rev1_221, __p2_221)); \ - __ret_221 = __builtin_shufflevector(__ret_221, __ret_221, 3, 2, 1, 0); \ + __ret_221 = __builtin_shufflevector(__ret_221, __ret_221, __lane_reverse_64_16); \ __ret_221; \ }) #endif @@ -37419,10 +37440,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int32x4_t __ret_223; \ int32x4_t __s0_223 = __p0_223; \ int32x2_t __s1_223 = __p1_223; \ - int32x4_t __rev0_223; __rev0_223 = __builtin_shufflevector(__s0_223, __s0_223, 3, 2, 1, 0); \ - int32x2_t __rev1_223; __rev1_223 = __builtin_shufflevector(__s1_223, __s1_223, 1, 0); \ + int32x4_t __rev0_223; __rev0_223 = __builtin_shufflevector(__s0_223, __s0_223, __lane_reverse_128_32); \ + int32x2_t __rev1_223; __rev1_223 = __builtin_shufflevector(__s1_223, __s1_223, __lane_reverse_64_32); \ __ret_223 = __noswap_vqrdmulhq_s32(__rev0_223, __noswap_splatq_lane_s32(__rev1_223, __p2_223)); \ - __ret_223 = __builtin_shufflevector(__ret_223, __ret_223, 3, 2, 1, 0); \ + __ret_223 = __builtin_shufflevector(__ret_223, __ret_223, __lane_reverse_128_32); \ __ret_223; \ }) #endif @@ -37440,10 +37461,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int16x8_t __ret_225; \ int16x8_t __s0_225 = __p0_225; \ int16x4_t __s1_225 = __p1_225; \ - int16x8_t __rev0_225; __rev0_225 = __builtin_shufflevector(__s0_225, __s0_225, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_225; __rev1_225 = __builtin_shufflevector(__s1_225, __s1_225, 3, 2, 1, 0); \ + int16x8_t __rev0_225; __rev0_225 = __builtin_shufflevector(__s0_225, __s0_225, __lane_reverse_128_16); \ + int16x4_t __rev1_225; __rev1_225 = __builtin_shufflevector(__s1_225, __s1_225, __lane_reverse_64_16); \ __ret_225 = __noswap_vqrdmulhq_s16(__rev0_225, __noswap_splatq_lane_s16(__rev1_225, __p2_225)); \ - __ret_225 = __builtin_shufflevector(__ret_225, __ret_225, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_225 = __builtin_shufflevector(__ret_225, __ret_225, __lane_reverse_128_16); \ __ret_225; \ }) #endif @@ -37461,10 +37482,10 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int32x2_t __ret_227; \ int32x2_t __s0_227 = __p0_227; \ int32x2_t __s1_227 = __p1_227; \ - int32x2_t __rev0_227; __rev0_227 = __builtin_shufflevector(__s0_227, __s0_227, 1, 0); \ - int32x2_t __rev1_227; __rev1_227 = __builtin_shufflevector(__s1_227, __s1_227, 1, 0); \ + int32x2_t __rev0_227; __rev0_227 = __builtin_shufflevector(__s0_227, __s0_227, __lane_reverse_64_32); \ + int32x2_t __rev1_227; __rev1_227 = __builtin_shufflevector(__s1_227, __s1_227, __lane_reverse_64_32); \ __ret_227 = __noswap_vqrdmulh_s32(__rev0_227, __noswap_splat_lane_s32(__rev1_227, __p2_227)); \ - __ret_227 = __builtin_shufflevector(__ret_227, __ret_227, 1, 0); \ + __ret_227 = __builtin_shufflevector(__ret_227, __ret_227, __lane_reverse_64_32); \ __ret_227; \ }) #endif @@ -37482,1332 +37503,1332 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int int16x4_t __ret_229; \ int16x4_t __s0_229 = __p0_229; \ int16x4_t __s1_229 = __p1_229; \ - int16x4_t __rev0_229; __rev0_229 = __builtin_shufflevector(__s0_229, __s0_229, 3, 2, 1, 0); \ - int16x4_t __rev1_229; __rev1_229 = __builtin_shufflevector(__s1_229, __s1_229, 3, 2, 1, 0); \ + int16x4_t __rev0_229; __rev0_229 = __builtin_shufflevector(__s0_229, __s0_229, __lane_reverse_64_16); \ + int16x4_t __rev1_229; __rev1_229 = __builtin_shufflevector(__s1_229, __s1_229, __lane_reverse_64_16); \ __ret_229 = __noswap_vqrdmulh_s16(__rev0_229, __noswap_splat_lane_s16(__rev1_229, __p2_229)); \ - __ret_229 = __builtin_shufflevector(__ret_229, __ret_229, 3, 2, 1, 0); \ + __ret_229 = __builtin_shufflevector(__ret_229, __ret_229, __lane_reverse_64_16); \ __ret_229; \ }) #endif __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } #endif @@ -38815,20 +38836,20 @@ __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s64(int64x1_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float16x4_t vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_f32(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float16x4_t vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_f32(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } __ai __attribute__((target("neon"))) float16x4_t __noswap_vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vcvt_f16_f32(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #endif @@ -38836,20 +38857,20 @@ __ai __attribute__((target("neon"))) float16x4_t __noswap_vcvt_f16_f32(float32x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvt_f32_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvt_f32_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vcvt_f32_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #endif @@ -38857,14 +38878,14 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vld1q_v(__p0, 40)); \ __ret; \ }) #else #define vld1q_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_v(__p0, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vld1q_v(__p0, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -38872,14 +38893,14 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vld1_v(__p0, 8)); \ __ret; \ }) #else #define vld1_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_v(__p0, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vld1_v(__p0, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -38887,14 +38908,14 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vld1q_dup_v(__p0, 40)); \ __ret; \ }) #else #define vld1q_dup_f16(__p0) __extension__ ({ \ float16x8_t __ret; \ - __ret = (float16x8_t) __builtin_neon_vld1q_dup_v(__p0, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vld1q_dup_v(__p0, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -38902,14 +38923,14 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #ifdef __LITTLE_ENDIAN__ #define vld1_dup_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vld1_dup_v(__p0, 8)); \ __ret; \ }) #else #define vld1_dup_f16(__p0) __extension__ ({ \ float16x4_t __ret; \ - __ret = (float16x4_t) __builtin_neon_vld1_dup_v(__p0, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vld1_dup_v(__p0, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -38918,16 +38939,16 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 40)); \ __ret; \ }) #else #define vld1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -38936,16 +38957,16 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s1 = __p1; \ - __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 8)); \ __ret; \ }) #else #define vld1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __ret; \ float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -38961,8 +38982,8 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -38978,8 +38999,8 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -38995,9 +39016,9 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39013,9 +39034,9 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39031,10 +39052,10 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39050,10 +39071,10 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39069,8 +39090,8 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39086,8 +39107,8 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39103,8 +39124,8 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39120,8 +39141,8 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39130,7 +39151,7 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __ret; \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 40); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 40); \ __ret; \ }) #else @@ -39138,12 +39159,12 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x2_t __ret; \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39152,7 +39173,7 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __ret; \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 8); \ __ret; \ }) #else @@ -39160,12 +39181,12 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x2_t __ret; \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39181,9 +39202,9 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39199,9 +39220,9 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39217,9 +39238,9 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39235,9 +39256,9 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39246,7 +39267,7 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __ret; \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 40); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 40); \ __ret; \ }) #else @@ -39254,14 +39275,14 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x3_t __ret; \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39270,7 +39291,7 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __ret; \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 8); \ __ret; \ }) #else @@ -39278,14 +39299,14 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x3_t __ret; \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39301,10 +39322,10 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39320,10 +39341,10 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39339,10 +39360,10 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39358,10 +39379,10 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39370,7 +39391,7 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __ret; \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 40); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 40); \ __ret; \ }) #else @@ -39378,16 +39399,16 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x8x4_t __ret; \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 40); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_16); \ __ret; \ }) #endif @@ -39396,7 +39417,7 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #define vld4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __ret; \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 8); \ __ret; \ }) #else @@ -39404,16 +39425,16 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 float16x4x4_t __ret; \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 8); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_16); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_16); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_16); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_16); \ __ret; \ }) #endif @@ -39421,340 +39442,340 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #ifdef __LITTLE_ENDIAN__ #define vst1q_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 40); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 40); \ }) #else #define vst1q_f16(__p0, __p1) __extension__ ({ \ float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 40); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 8); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 8); \ }) #else #define vst1_f16(__p0, __p1) __extension__ ({ \ float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__rev1, 8); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 40); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 40); \ }) #else #define vst1q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 40); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 8); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 8); \ }) #else #define vst1_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__rev1, __p2, 8); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 40); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 40); \ }) #else #define vst1q_f16_x2(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x2(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 8); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 8); \ }) #else #define vst1_f16_x2(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 40); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 40); \ }) #else #define vst1q_f16_x3(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x3(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 8); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 8); \ }) #else #define vst1_f16_x3(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 40); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 40); \ }) #else #define vst1q_f16_x4(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1_f16_x4(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 8); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 8); \ }) #else #define vst1_f16_x4(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f16(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 40); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 40); \ }) #else #define vst2q_f16(__p0, __p1) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_f16(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 8); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 8); \ }) #else #define vst2_f16(__p0, __p1) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 40); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 40); \ }) #else #define vst2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x2_t __s1 = __p1; \ float16x8x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 8); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 8); \ }) #else #define vst2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x2_t __s1 = __p1; \ float16x4x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], __p2, 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f16(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 40); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 40); \ }) #else #define vst3q_f16(__p0, __p1) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_f16(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 8); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 8); \ }) #else #define vst3_f16(__p0, __p1) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 40); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 40); \ }) #else #define vst3q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x3_t __s1 = __p1; \ float16x8x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 8); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 8); \ }) #else #define vst3_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x3_t __s1 = __p1; \ float16x4x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], __p2, 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f16(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 40); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 40); \ }) #else #define vst4q_f16(__p0, __p1) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_f16(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 8); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 8); \ }) #else #define vst4_f16(__p0, __p1) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 8); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 40); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 40); \ }) #else #define vst4q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8x4_t __s1 = __p1; \ float16x8x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 40); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_16); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 40); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 8); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 8); \ }) #else #define vst4_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16x4x4_t __s1 = __p1; \ float16x4x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 3, 2, 1, 0); \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__rev1.val[0], (int8x8_t)__rev1.val[1], (int8x8_t)__rev1.val[2], (int8x8_t)__rev1.val[3], __p2, 8); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_16); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_16); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_16); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_16); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 8); \ }) #endif @@ -39763,45 +39784,45 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vcvt_f32_f16(float16x4 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmaxnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmaxnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vmaxnm_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmaxnm_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vminnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vminnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vminnm_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vminnm_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } #endif @@ -39809,148 +39830,148 @@ __ai __attribute__((target("neon"))) float64x1_t vminnm_f64(float64x1_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrnd_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrnd_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndaq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndaq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndaq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndaq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrnda_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrnda_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndiq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndiq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndiq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndiq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrndi_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrndi_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndmq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndmq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndmq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndmq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrndm_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrndm_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndnq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndnq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndnq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndnq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrndn_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrndn_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndpq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndpq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndpq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndpq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrndp_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrndp_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrndxq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndxq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrndxq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrndxq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrndx_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrndx_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #endif @@ -39958,16 +39979,16 @@ __ai __attribute__((target("neon"))) float64x1_t vrndx_f64(float64x1_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes,neon"))) uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesdq_u8((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaesdq_u8(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("aes,neon"))) uint8x16_t vaesdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vaesdq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaesdq_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -39975,16 +39996,16 @@ __ai __attribute__((target("aes,neon"))) uint8x16_t vaesdq_u8(uint8x16_t __p0, u #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes,neon"))) uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaeseq_u8((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaeseq_u8(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("aes,neon"))) uint8x16_t vaeseq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vaeseq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaeseq_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -39992,15 +40013,15 @@ __ai __attribute__((target("aes,neon"))) uint8x16_t vaeseq_u8(uint8x16_t __p0, u #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes,neon"))) uint8x16_t vaesimcq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesimcq_u8((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaesimcq_u8(__builtin_bit_cast(int8x16_t, __p0), 48)); return __ret; } #else __ai __attribute__((target("aes,neon"))) uint8x16_t vaesimcq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vaesimcq_u8((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaesimcq_u8(__builtin_bit_cast(int8x16_t, __rev0), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -40008,15 +40029,15 @@ __ai __attribute__((target("aes,neon"))) uint8x16_t vaesimcq_u8(uint8x16_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes,neon"))) uint8x16_t vaesmcq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vaesmcq_u8((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaesmcq_u8(__builtin_bit_cast(int8x16_t, __p0), 48)); return __ret; } #else __ai __attribute__((target("aes,neon"))) uint8x16_t vaesmcq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vaesmcq_u8((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vaesmcq_u8(__builtin_bit_cast(int8x16_t, __rev0), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -40024,15 +40045,15 @@ __ai __attribute__((target("aes,neon"))) uint8x16_t vaesmcq_u8(uint8x16_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtaq_s32_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vcvtaq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vcvtaq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtaq_s32_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40040,15 +40061,15 @@ __ai __attribute__((target("neon"))) int32x4_t vcvtaq_s32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcvta_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvta_s32_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcvta_s32_f32(float32x2_t __p0) { int32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcvta_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvta_s32_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40056,15 +40077,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcvta_s32_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtaq_u32_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcvtaq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtaq_u32_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40072,15 +40093,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcvtaq_u32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcvta_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvta_u32_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcvta_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcvta_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvta_u32_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40088,15 +40109,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvta_u32_f32(float32x2_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtmq_s32_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vcvtmq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vcvtmq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtmq_s32_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40104,15 +40125,15 @@ __ai __attribute__((target("neon"))) int32x4_t vcvtmq_s32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcvtm_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvtm_s32_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcvtm_s32_f32(float32x2_t __p0) { int32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcvtm_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvtm_s32_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40120,15 +40141,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcvtm_s32_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtmq_u32_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcvtmq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtmq_u32_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40136,15 +40157,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcvtmq_u32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvtm_u32_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcvtm_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcvtm_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvtm_u32_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40152,15 +40173,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvtm_u32_f32(float32x2_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtnq_s32_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vcvtnq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vcvtnq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtnq_s32_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40168,15 +40189,15 @@ __ai __attribute__((target("neon"))) int32x4_t vcvtnq_s32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcvtn_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvtn_s32_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcvtn_s32_f32(float32x2_t __p0) { int32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcvtn_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvtn_s32_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40184,15 +40205,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcvtn_s32_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtnq_u32_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcvtnq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtnq_u32_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40200,15 +40221,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcvtnq_u32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvtn_u32_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcvtn_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcvtn_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvtn_u32_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40216,15 +40237,15 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvtn_u32_f32(float32x2_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__p0, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtpq_s32_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vcvtpq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vcvtpq_s32_v((int8x16_t)__rev0, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vcvtpq_s32_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40232,15 +40253,15 @@ __ai __attribute__((target("neon"))) int32x4_t vcvtpq_s32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vcvtp_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__p0, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvtp_s32_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vcvtp_s32_f32(float32x2_t __p0) { int32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32x2_t) __builtin_neon_vcvtp_s32_v((int8x8_t)__rev0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vcvtp_s32_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40248,15 +40269,15 @@ __ai __attribute__((target("neon"))) int32x2_t vcvtp_s32_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtpq_u32_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcvtpq_u32_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcvtpq_u32_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40264,15 +40285,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcvtpq_u32_f32(float32x4_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvtp_u32_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcvtp_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcvtp_u32_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcvtp_u32_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40280,38 +40301,38 @@ __ai __attribute__((target("neon"))) uint32x2_t vcvtp_u32_f32(float32x2_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__p0, __p1, __p2); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1cq_u32(__p0, __p1, __p2)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1cq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha1cq_u32(__rev0, __p1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1cq_u32(__rev0, __p1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif __ai __attribute__((target("sha2,neon"))) uint32_t vsha1h_u32(uint32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsha1h_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vsha1h_u32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__p0, __p1, __p2); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1mq_u32(__p0, __p1, __p2)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1mq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha1mq_u32(__rev0, __p1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1mq_u32(__rev0, __p1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40319,16 +40340,16 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1mq_u32(uint32x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__p0, __p1, __p2); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1pq_u32(__p0, __p1, __p2)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1pq_u32(uint32x4_t __p0, uint32_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha1pq_u32(__rev0, __p1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1pq_u32(__rev0, __p1, __rev2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40336,17 +40357,17 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1pq_u32(uint32x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1su0q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1su0q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1su0q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha1su0q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1su0q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40354,16 +40375,16 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1su0q_u32(uint32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha1su1q_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1su1q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1su1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha1su1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha1su1q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40371,17 +40392,17 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha1su1q_u32(uint32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256hq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256hq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256hq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha256hq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256hq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40389,17 +40410,17 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256hq_u32(uint32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256h2q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256h2q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256h2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha256h2q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256h2q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40407,16 +40428,16 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256h2q_u32(uint32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256su0q_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256su0q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256su0q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha256su0q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256su0q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40424,17 +40445,17 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256su0q_u32(uint32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsha256su1q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256su1q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256su1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsha256su1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsha256su1q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40444,15 +40465,15 @@ __ai __attribute__((target("sha2,neon"))) uint32x4_t vsha256su1q_u32(uint32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40460,15 +40481,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrnd_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrnd_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrnd_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrnd_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrnd_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrnd_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40476,15 +40497,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrnd_f16(float16x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndaq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndaq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndaq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndaq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndaq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndaq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40492,15 +40513,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndaq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrnda_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrnda_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrnda_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrnda_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrnda_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrnda_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40508,15 +40529,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrnda_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndmq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndmq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndmq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndmq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndmq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndmq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40524,15 +40545,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndmq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndm_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndm_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndm_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndm_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrndm_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndm_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40540,15 +40561,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndm_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndnq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndnq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndnq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndnq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndnq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndnq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40556,15 +40577,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndnq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndn_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndn_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndn_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndn_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrndn_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndn_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40572,15 +40593,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndn_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndpq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndpq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndpq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndpq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndpq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndpq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40588,15 +40609,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndpq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndp_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndp_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndp_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndp_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrndp_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndp_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40604,15 +40625,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndp_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndxq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndxq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndxq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndxq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndxq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndxq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40620,15 +40641,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndxq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndx_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndx_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndx_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndx_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrndx_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndx_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40636,15 +40657,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndx_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40652,15 +40673,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrndq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrnd_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrnd_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrnd_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40668,15 +40689,15 @@ __ai __attribute__((target("neon"))) float32x2_t vrnd_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndaq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndaq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndaq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndaq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndaq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40684,15 +40705,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrndaq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrnda_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnda_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrnda_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrnda_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnda_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40700,15 +40721,15 @@ __ai __attribute__((target("neon"))) float32x2_t vrnda_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndiq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndiq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndiq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndiq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndiq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40716,15 +40737,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrndiq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrndi_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndi_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrndi_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrndi_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndi_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40732,15 +40753,15 @@ __ai __attribute__((target("neon"))) float32x2_t vrndi_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndmq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndmq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndmq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndmq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndmq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40748,15 +40769,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrndmq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrndm_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndm_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrndm_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrndm_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndm_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40764,15 +40785,15 @@ __ai __attribute__((target("neon"))) float32x2_t vrndm_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndnq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndnq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndnq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndnq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndnq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40780,36 +40801,36 @@ __ai __attribute__((target("neon"))) float32x4_t vrndnq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrndn_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndn_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrndn_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrndn_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndn_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) float32_t vrndns_f32(float32_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vrndns_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vrndns_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndpq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndpq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndpq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndpq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndpq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40817,15 +40838,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrndpq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrndp_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndp_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrndp_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrndp_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndp_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40833,15 +40854,15 @@ __ai __attribute__((target("neon"))) float32x2_t vrndp_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vrndxq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndxq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vrndxq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrndxq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrndxq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40849,15 +40870,15 @@ __ai __attribute__((target("neon"))) float32x4_t vrndxq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vrndx_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndx_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vrndx_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrndx_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrndx_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40867,16 +40888,16 @@ __ai __attribute__((target("neon"))) float32x2_t vrndx_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmaxnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmaxnmq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vmaxnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmaxnmq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40884,16 +40905,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vmaxnmq_f16(float16x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmaxnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmaxnm_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vmaxnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmaxnm_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40901,16 +40922,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vmaxnm_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vminnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vminnmq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vminnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vminnmq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -40918,16 +40939,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vminnmq_f16(float16x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vminnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vminnm_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vminnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vminnm_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -40935,16 +40956,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vminnm_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmaxnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmaxnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40952,16 +40973,16 @@ __ai __attribute__((target("neon"))) float32x4_t vmaxnmq_f32(float32x4_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmaxnm_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmaxnm_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -40969,16 +40990,16 @@ __ai __attribute__((target("neon"))) float32x2_t vmaxnm_f32(float32x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vminnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vminnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -40986,16 +41007,16 @@ __ai __attribute__((target("neon"))) float32x4_t vminnmq_f32(float32x4_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vminnm_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vminnm_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -41005,22 +41026,22 @@ __ai __attribute__((target("neon"))) float32x2_t vminnm_f32(float32x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) float32x4_t __noswap_vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -41028,22 +41049,22 @@ __ai __attribute__((target("neon"))) float32x4_t __noswap_vfmaq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) float32x2_t __noswap_vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -41057,10 +41078,10 @@ __ai __attribute__((target("neon"))) float32x4_t vfmaq_n_f32(float32x4_t __p0, f #else __ai __attribute__((target("neon"))) float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vfmaq_f32(__rev0, __rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41074,10 +41095,10 @@ __ai __attribute__((target("neon"))) float32x2_t vfma_n_f32(float32x2_t __p0, fl #else __ai __attribute__((target("neon"))) float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vfma_f32(__rev0, __rev1, (float32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -41091,11 +41112,11 @@ __ai __attribute__((target("neon"))) float32x4_t vfmsq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vfmaq_f32(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41109,11 +41130,11 @@ __ai __attribute__((target("neon"))) float32x2_t vfms_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __noswap_vfma_f32(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -41123,15 +41144,15 @@ __ai __attribute__((target("neon"))) float32x2_t vfms_f32(float32x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvt1_bf16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt1_bf16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvt1_bf16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt1_bf16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41139,15 +41160,15 @@ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvt1_f16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt1_f16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { float16x8_t __ret; - mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvt1_f16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt1_f16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41155,15 +41176,15 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_f16_mf8_fpm(mfloat8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvt1_high_bf16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt1_high_bf16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvt1_high_bf16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt1_high_bf16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41171,15 +41192,15 @@ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_high_bf16_mf8_fpm(mf #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvt1_high_f16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt1_high_f16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvt1_high_f16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt1_high_f16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41187,15 +41208,15 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_high_f16_mf8_fpm(mflo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvt1_low_bf16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt1_low_bf16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvt1_low_bf16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt1_low_bf16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41203,15 +41224,15 @@ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_low_bf16_mf8_fpm(mfl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvt1_low_f16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt1_low_f16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvt1_low_f16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt1_low_f16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41219,15 +41240,15 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_low_f16_mf8_fpm(mfloa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvt2_bf16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt2_bf16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvt2_bf16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt2_bf16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41235,15 +41256,15 @@ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_bf16_mf8_fpm(mfloat8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvt2_f16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt2_f16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { float16x8_t __ret; - mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvt2_f16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt2_f16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41251,15 +41272,15 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_f16_mf8_fpm(mfloat8x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvt2_high_bf16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt2_high_bf16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvt2_high_bf16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt2_high_bf16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41267,15 +41288,15 @@ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_high_bf16_mf8_fpm(mf #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvt2_high_f16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt2_high_f16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvt2_high_f16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt2_high_f16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41283,15 +41304,15 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_high_f16_mf8_fpm(mflo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvt2_low_bf16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt2_low_bf16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { bfloat16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvt2_low_bf16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvt2_low_bf16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41299,15 +41320,15 @@ __ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_low_bf16_mf8_fpm(mfl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vcvt2_low_f16_mf8_fpm(__p0, __p1); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt2_low_f16_mf8_fpm(__p0, __p1)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { float16x8_t __ret; - mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vcvt2_low_f16_mf8_fpm(__rev0, __p1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vcvt2_low_f16_mf8_fpm(__rev0, __p1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41315,17 +41336,17 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_low_f16_mf8_fpm(mfloa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvt_high_mf8_f32_fpm(mfloat8x8_t __p0, float32x4_t __p1, float32x4_t __p2, fpm_t __p3) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t) __builtin_neon_vcvt_high_mf8_f32_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vcvt_high_mf8_f32_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvt_high_mf8_f32_fpm(mfloat8x8_t __p0, float32x4_t __p1, float32x4_t __p2, fpm_t __p3) { mfloat8x16_t __ret; - mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (mfloat8x16_t) __builtin_neon_vcvt_high_mf8_f32_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vcvt_high_mf8_f32_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -41333,16 +41354,16 @@ __ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvt_high_mf8_f32_fpm(mflo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvtq_mf8_f16_fpm(float16x8_t __p0, float16x8_t __p1, fpm_t __p2) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t) __builtin_neon_vcvtq_mf8_f16_fpm((int8x16_t)__p0, (int8x16_t)__p1, __p2); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vcvtq_mf8_f16_fpm(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __p2)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvtq_mf8_f16_fpm(float16x8_t __p0, float16x8_t __p1, fpm_t __p2) { mfloat8x16_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (mfloat8x16_t) __builtin_neon_vcvtq_mf8_f16_fpm((int8x16_t)__rev0, (int8x16_t)__rev1, __p2); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vcvtq_mf8_f16_fpm(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -41350,16 +41371,16 @@ __ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvtq_mf8_f16_fpm(float16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f16_fpm(float16x4_t __p0, float16x4_t __p1, fpm_t __p2) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f16_fpm((int8x8_t)__p0, (int8x8_t)__p1, __p2); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vcvt_mf8_f16_fpm(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __p2)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f16_fpm(float16x4_t __p0, float16x4_t __p1, fpm_t __p2) { mfloat8x8_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f16_fpm((int8x8_t)__rev0, (int8x8_t)__rev1, __p2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vcvt_mf8_f16_fpm(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -41367,16 +41388,16 @@ __ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f16_fpm(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f32_fpm(float32x4_t __p0, float32x4_t __p1, fpm_t __p2) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f32_fpm(__p0, __p1, __p2); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vcvt_mf8_f32_fpm(__p0, __p1, __p2)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f32_fpm(float32x4_t __p0, float32x4_t __p1, fpm_t __p2) { mfloat8x8_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f32_fpm(__rev0, __rev1, __p2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vcvt_mf8_f32_fpm(__rev0, __rev1, __p2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -41384,16 +41405,16 @@ __ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f32_fpm(float32x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float32x2_t vscale_f32(float32x2_t __p0, int32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vscale_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vscale_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float32x2_t vscale_f32(float32x2_t __p0, int32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vscale_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vscale_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -41401,16 +41422,16 @@ __ai __attribute__((target("fp8,neon"))) float32x2_t vscale_f32(float32x2_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x4_t vscale_f16(float16x4_t __p0, int16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vscale_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vscale_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x4_t vscale_f16(float16x4_t __p0, int16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vscale_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vscale_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -41418,16 +41439,16 @@ __ai __attribute__((target("fp8,neon"))) float16x4_t vscale_f16(float16x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float64x2_t vscaleq_f64(float64x2_t __p0, int64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vscaleq_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vscaleq_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float64x2_t vscaleq_f64(float64x2_t __p0, int64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vscaleq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vscaleq_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -41435,16 +41456,16 @@ __ai __attribute__((target("fp8,neon"))) float64x2_t vscaleq_f64(float64x2_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float32x4_t vscaleq_f32(float32x4_t __p0, int32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vscaleq_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vscaleq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float32x4_t vscaleq_f32(float32x4_t __p0, int32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vscaleq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vscaleq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41452,16 +41473,16 @@ __ai __attribute__((target("fp8,neon"))) float32x4_t vscaleq_f32(float32x4_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8,neon"))) float16x8_t vscaleq_f16(float16x8_t __p0, int16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vscaleq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vscaleq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fp8,neon"))) float16x8_t vscaleq_f16(float16x8_t __p0, int16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vscaleq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vscaleq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41469,17 +41490,17 @@ __ai __attribute__((target("fp8,neon"))) float16x8_t vscaleq_f16(float16x8_t __p #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8dot2,neon"))) float16x8_t vdotq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vdotq_f16_mf8_fpm((int8x16_t)__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vdotq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __p0), __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8dot2,neon"))) float16x8_t vdotq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vdotq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vdotq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41487,17 +41508,17 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x8_t vdotq_f16_mf8_fpm(float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float16x4_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vdot_f16_mf8_fpm((int8x8_t)__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vdot_f16_mf8_fpm(__builtin_bit_cast(int8x8_t, __p0), __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float16x4_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vdot_f16_mf8_fpm((int8x8_t)__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vdot_f16_mf8_fpm(__builtin_bit_cast(int8x8_t, __rev0), __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -41509,7 +41530,7 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x8_t) __builtin_neon_vdotq_lane_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vdotq_lane_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41519,11 +41540,11 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vdotq_lane_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vdotq_lane_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -41535,7 +41556,7 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x8_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x4_t) __builtin_neon_vdot_lane_f16_mf8_fpm((int8x8_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vdot_lane_f16_mf8_fpm(__builtin_bit_cast(int8x8_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41545,11 +41566,11 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x8_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vdot_lane_f16_mf8_fpm((int8x8_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vdot_lane_f16_mf8_fpm(__builtin_bit_cast(int8x8_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -41561,7 +41582,7 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x8_t) __builtin_neon_vdotq_laneq_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vdotq_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41571,11 +41592,11 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vdotq_laneq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vdotq_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -41587,7 +41608,7 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x8_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x4_t) __builtin_neon_vdot_laneq_f16_mf8_fpm((int8x8_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vdot_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x8_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41597,11 +41618,11 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 mfloat8x8_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vdot_laneq_f16_mf8_fpm((int8x8_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vdot_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x8_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -41609,17 +41630,17 @@ __ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8dot4,neon"))) float32x4_t vdotq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vdotq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vdotq_f32_mf8_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8dot4,neon"))) float32x4_t vdotq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vdotq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vdotq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41627,17 +41648,17 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x4_t vdotq_f32_mf8_fpm(float #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float32x2_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vdot_f32_mf8_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vdot_f32_mf8_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float32x2_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vdot_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vdot_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -41649,7 +41670,7 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vdotq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vdotq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41659,11 +41680,11 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vdotq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vdotq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -41675,7 +41696,7 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x8_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x2_t) __builtin_neon_vdot_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vdot_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41685,11 +41706,11 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x8_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vdot_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vdot_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -41701,7 +41722,7 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vdotq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vdotq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41711,11 +41732,11 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vdotq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vdotq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -41727,7 +41748,7 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x8_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x2_t) __builtin_neon_vdot_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vdot_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41737,11 +41758,11 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 mfloat8x8_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vdot_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vdot_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -41749,17 +41770,17 @@ __ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmlalbq_f16_mf8_fpm((int8x16_t)__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlalbq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __p0), __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vmlalbq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlalbq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -41771,7 +41792,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x8_t) __builtin_neon_vmlalbq_lane_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlalbq_lane_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41781,11 +41802,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vmlalbq_lane_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlalbq_lane_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -41797,7 +41818,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x8_t) __builtin_neon_vmlalbq_laneq_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlalbq_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41807,11 +41828,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vmlalbq_laneq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlalbq_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -41819,17 +41840,17 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmlallbbq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbbq_f32_mf8_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmlallbbq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbbq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41841,7 +41862,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlallbbq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbbq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41851,11 +41872,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlallbbq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbbq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -41867,7 +41888,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlallbbq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbbq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41877,11 +41898,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlallbbq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbbq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -41889,17 +41910,17 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmlallbtq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbtq_f32_mf8_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmlallbtq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbtq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41911,7 +41932,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlallbtq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbtq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41921,11 +41942,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlallbtq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbtq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -41937,7 +41958,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlallbtq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbtq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41947,11 +41968,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlallbtq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallbtq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -41959,17 +41980,17 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmlalltbq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlalltbq_f32_mf8_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmlalltbq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlalltbq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -41981,7 +42002,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlalltbq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlalltbq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -41991,11 +42012,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlalltbq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlalltbq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -42007,7 +42028,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlalltbq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlalltbq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -42017,11 +42038,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlalltbq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlalltbq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -42029,17 +42050,17 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmlallttq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallttq_f32_mf8_fpm(__p0, __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmlallttq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallttq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -42051,7 +42072,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlallttq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallttq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -42061,11 +42082,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlallttq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallttq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -42077,7 +42098,7 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float32x4_t) __builtin_neon_vmlallttq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallttq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -42087,11 +42108,11 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(fl mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vmlallttq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmlallttq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -42099,17 +42120,17 @@ __ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmlaltq_f16_mf8_fpm((int8x16_t)__p0, __p1, __p2, __p3); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlaltq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __p0), __p1, __p2, __p3)); return __ret; } #else __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vmlaltq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlaltq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -42121,7 +42142,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x8_t) __builtin_neon_vmlaltq_lane_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlaltq_lane_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -42131,11 +42152,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x8_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vmlaltq_lane_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlaltq_lane_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42147,7 +42168,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - __ret = (float16x8_t) __builtin_neon_vmlaltq_laneq_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlaltq_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __s0), __s1, __s2, __p3, __s4)); \ __ret; \ }) #else @@ -42157,11 +42178,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa mfloat8x16_t __s1 = __p1; \ mfloat8x16_t __s2 = __p2; \ fpm_t __s4 = __p4; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vmlaltq_laneq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmlaltq_laneq_f16_mf8_fpm(__builtin_bit_cast(int8x16_t, __rev0), __rev1, __rev2, __p3, __s4)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42171,7 +42192,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vluti2_lane_p8((int8x8_t)__s0, (int8x8_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2_lane_p8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -42179,10 +42200,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vluti2_lane_p8((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2_lane_p8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42192,7 +42213,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vluti2q_lane_p8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2q_lane_p8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -42200,10 +42221,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vluti2q_lane_p8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2q_lane_p8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42213,7 +42234,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vluti2q_lane_u8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2q_lane_u8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -42221,10 +42242,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vluti2q_lane_u8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2q_lane_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42234,7 +42255,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vluti2q_lane_s8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2q_lane_s8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -42242,10 +42263,31 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vluti2q_lane_s8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2q_lane_s8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2q_lane_mf8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vluti2q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2q_lane_mf8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42255,7 +42297,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vluti2_lane_u8((int8x8_t)__s0, (int8x8_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2_lane_u8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -42263,10 +42305,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vluti2_lane_u8((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2_lane_u8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42276,7 +42318,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vluti2_lane_s8((int8x8_t)__s0, (int8x8_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2_lane_s8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -42284,10 +42326,31 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vluti2_lane_s8((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2_lane_s8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2_lane_mf8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vluti2_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2_lane_mf8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42297,7 +42360,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vluti2_lane_p16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2_lane_p16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -42305,10 +42368,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vluti2_lane_p16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2_lane_p16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42318,7 +42381,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vluti2q_lane_p16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2q_lane_p16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -42326,10 +42389,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vluti2q_lane_p16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2q_lane_p16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42339,7 +42402,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vluti2q_lane_u16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2q_lane_u16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -42347,10 +42410,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vluti2q_lane_u16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2q_lane_u16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42360,7 +42423,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vluti2q_lane_f16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2q_lane_f16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -42368,10 +42431,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vluti2q_lane_f16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2q_lane_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42381,7 +42444,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vluti2q_lane_s16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2q_lane_s16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -42389,10 +42452,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vluti2q_lane_s16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2q_lane_s16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42402,7 +42465,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vluti2_lane_u16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2_lane_u16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -42410,10 +42473,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vluti2_lane_u16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2_lane_u16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42423,7 +42486,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vluti2_lane_f16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2_lane_f16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -42431,10 +42494,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vluti2_lane_f16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2_lane_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42444,7 +42507,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vluti2_lane_s16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2_lane_s16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -42452,10 +42515,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vluti2_lane_s16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2_lane_s16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42465,7 +42528,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vluti2_laneq_p8((int8x8_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2_laneq_p8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -42473,10 +42536,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vluti2_laneq_p8((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2_laneq_p8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42486,7 +42549,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vluti2q_laneq_p8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2q_laneq_p8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -42494,10 +42557,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vluti2q_laneq_p8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti2q_laneq_p8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42507,7 +42570,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vluti2q_laneq_u8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2q_laneq_u8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -42515,10 +42578,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vluti2q_laneq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2q_laneq_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42528,7 +42591,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vluti2q_laneq_s8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2q_laneq_s8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -42536,10 +42599,31 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vluti2q_laneq_s8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2q_laneq_s8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2q_laneq_mf8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vluti2q_laneq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2q_laneq_mf8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42549,7 +42633,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vluti2_laneq_u8((int8x8_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2_laneq_u8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -42557,10 +42641,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vluti2_laneq_u8((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti2_laneq_u8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42570,7 +42654,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vluti2_laneq_s8((int8x8_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2_laneq_s8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -42578,10 +42662,31 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vluti2_laneq_s8((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti2_laneq_s8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2_laneq_mf8(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vluti2_laneq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti2_laneq_mf8(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42591,7 +42696,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vluti2_laneq_p16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2_laneq_p16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -42599,10 +42704,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vluti2_laneq_p16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2_laneq_p16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42612,7 +42717,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vluti2q_laneq_p16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2q_laneq_p16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -42620,10 +42725,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vluti2q_laneq_p16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti2q_laneq_p16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42633,7 +42738,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vluti2q_laneq_u16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2q_laneq_u16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -42641,10 +42746,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vluti2q_laneq_u16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2q_laneq_u16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42654,7 +42759,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vluti2q_laneq_f16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2q_laneq_f16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -42662,10 +42767,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vluti2q_laneq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2q_laneq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42675,7 +42780,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vluti2q_laneq_s16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2q_laneq_s16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -42683,10 +42788,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vluti2q_laneq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2q_laneq_s16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42696,7 +42801,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vluti2_laneq_u16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2_laneq_u16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -42704,10 +42809,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vluti2_laneq_u16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti2_laneq_u16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42717,7 +42822,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vluti2_laneq_f16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2_laneq_f16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -42725,10 +42830,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vluti2_laneq_f16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti2_laneq_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42738,7 +42843,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vluti2_laneq_s16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2_laneq_s16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -42746,10 +42851,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vluti2_laneq_s16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti2_laneq_s16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42759,7 +42864,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vluti4q_lane_p8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti4q_lane_p8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -42767,10 +42872,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vluti4q_lane_p8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti4q_lane_p8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42780,7 +42885,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vluti4q_lane_u8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti4q_lane_u8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -42788,10 +42893,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vluti4q_lane_u8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti4q_lane_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42801,7 +42906,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vluti4q_lane_s8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti4q_lane_s8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -42809,10 +42914,31 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vluti4q_lane_s8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti4q_lane_s8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti4q_lane_mf8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vluti4q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti4q_lane_mf8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42822,7 +42948,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vluti4q_lane_p16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti4q_lane_p16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x8_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -42831,11 +42957,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ poly16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vluti4q_lane_p16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti4q_lane_p16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42845,7 +42971,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vluti4q_lane_u16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti4q_lane_u16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x8_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -42854,11 +42980,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ uint16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vluti4q_lane_u16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti4q_lane_u16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42868,7 +42994,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vluti4q_lane_f16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti4q_lane_f16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x8_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -42877,11 +43003,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ float16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vluti4q_lane_f16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti4q_lane_f16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42891,7 +43017,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vluti4q_lane_s16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti4q_lane_s16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x8_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -42900,11 +43026,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ int16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vluti4q_lane_s16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti4q_lane_s16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -42914,7 +43040,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (poly8x16_t) __builtin_neon_vluti4q_laneq_p8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti4q_laneq_p8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 36)); \ __ret; \ }) #else @@ -42922,10 +43048,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly8x16_t __ret; \ poly8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8x16_t) __builtin_neon_vluti4q_laneq_p8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vluti4q_laneq_p8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 36)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42935,7 +43061,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint8x16_t) __builtin_neon_vluti4q_laneq_u8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti4q_laneq_u8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 48)); \ __ret; \ }) #else @@ -42943,10 +43069,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint8x16_t __ret; \ uint8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8x16_t) __builtin_neon_vluti4q_laneq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vluti4q_laneq_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 48)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42956,7 +43082,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (int8x16_t) __builtin_neon_vluti4q_laneq_s8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti4q_laneq_s8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 32)); \ __ret; \ }) #else @@ -42964,10 +43090,31 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int8x16_t __ret; \ int8x16_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8x16_t) __builtin_neon_vluti4q_laneq_s8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vluti4q_laneq_s8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 32)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti4q_laneq_mf8(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vluti4q_laneq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vluti4q_laneq_mf8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -42977,7 +43124,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8_t __ret; \ poly16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (poly16x8_t) __builtin_neon_vluti4q_laneq_p16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 37); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti4q_laneq_p16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x16_t, __s1), __p2, 37)); \ __ret; \ }) #else @@ -42986,11 +43133,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa poly16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ poly16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16x8_t) __builtin_neon_vluti4q_laneq_p16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 37); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly16x8_t, __builtin_neon_vluti4q_laneq_p16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), __p2, 37)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43000,7 +43147,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8_t __ret; \ uint16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (uint16x8_t) __builtin_neon_vluti4q_laneq_u16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 49); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti4q_laneq_u16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x16_t, __s1), __p2, 49)); \ __ret; \ }) #else @@ -43009,11 +43156,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa uint16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ uint16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16x8_t) __builtin_neon_vluti4q_laneq_u16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 49); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vluti4q_laneq_u16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), __p2, 49)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43023,7 +43170,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8_t __ret; \ float16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (float16x8_t) __builtin_neon_vluti4q_laneq_f16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti4q_laneq_f16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x16_t, __s1), __p2, 40)); \ __ret; \ }) #else @@ -43032,11 +43179,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa float16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ float16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vluti4q_laneq_f16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vluti4q_laneq_f16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), __p2, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43046,7 +43193,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8_t __ret; \ int16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vluti4q_laneq_s16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti4q_laneq_s16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -43055,11 +43202,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa int16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ int16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vluti4q_laneq_s16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vluti4q_laneq_s16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43069,7 +43216,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2q_lane_bf16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2q_lane_bf16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 43)); \ __ret; \ }) #else @@ -43077,10 +43224,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2q_lane_bf16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2q_lane_bf16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43090,7 +43237,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2_lane_bf16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2_lane_bf16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 43)); \ __ret; \ }) #else @@ -43098,10 +43245,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2_lane_bf16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2_lane_bf16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43111,7 +43258,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2q_laneq_bf16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2q_laneq_bf16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 43)); \ __ret; \ }) #else @@ -43119,10 +43266,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x8_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2q_laneq_bf16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2q_laneq_bf16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43132,7 +43279,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2_laneq_bf16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2_laneq_bf16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 43)); \ __ret; \ }) #else @@ -43140,10 +43287,10 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x4_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vluti2_laneq_bf16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti2_laneq_bf16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43153,7 +43300,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vluti4q_lane_bf16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti4q_lane_bf16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x8_t, __s1), __p2, 43)); \ __ret; \ }) #else @@ -43162,11 +43309,11 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8x2_t __s0 = __p0; \ uint8x8_t __s1 = __p1; \ bfloat16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vluti4q_lane_bf16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti4q_lane_bf16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -43176,7 +43323,7 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8_t __ret; \ bfloat16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ - __ret = (bfloat16x8_t) __builtin_neon_vluti4q_laneq_bf16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 43); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti4q_laneq_bf16_x2(__builtin_bit_cast(int8x16_t, __s0.val[0]), __builtin_bit_cast(int8x16_t, __s0.val[1]), __builtin_bit_cast(int8x16_t, __s1), __p2, 43)); \ __ret; \ }) #else @@ -43185,28 +43332,821 @@ __ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(floa bfloat16x8x2_t __s0 = __p0; \ uint8x16_t __s1 = __p1; \ bfloat16x8x2_t __rev0; \ - __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (bfloat16x8_t) __builtin_neon_vluti4q_laneq_bf16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 43); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], __lane_reverse_128_16); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], __lane_reverse_128_16); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vluti4q_laneq_bf16_x2(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), __p2, 43)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define splatq_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 12)); \ + __ret; \ +}) +#else +#define splatq_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 12)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#define __noswap_splatq_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_splatq_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 12)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define splat_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 12)); \ + __ret; \ +}) +#else +#define splat_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __p1, 12)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#define __noswap_splat_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_splat_lane_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 12)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define splatq_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 44)); \ + __ret; \ +}) +#else +#define splatq_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#define __noswap_splatq_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_splatq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 44)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define splat_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 44)); \ + __ret; \ +}) +#else +#define splat_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#define __noswap_splat_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_splat_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 44)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vbslq_mf8(uint8x16_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vbslq_mf8(uint8x16_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2) { + mfloat8x16_t __ret; + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vbsl_mf8(uint8x8_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vbsl_mf8(uint8x8_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vcombine_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vcombine_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x16_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#define vcreate_mf8(__p0) __extension__ ({ \ + mfloat8x8_t __ret; \ + uint64_t __promote = __p0; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __promote); \ + __ret; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vdupq_lane_mf8(__p0_230, __p1_230) __extension__ ({ \ + mfloat8x16_t __ret_230; \ + mfloat8x8_t __s0_230 = __p0_230; \ + __ret_230 = splatq_lane_mf8(__s0_230, __p1_230); \ + __ret_230; \ +}) +#else +#define vdupq_lane_mf8(__p0_231, __p1_231) __extension__ ({ \ + mfloat8x16_t __ret_231; \ + mfloat8x8_t __s0_231 = __p0_231; \ + mfloat8x8_t __rev0_231; __rev0_231 = __builtin_shufflevector(__s0_231, __s0_231, __lane_reverse_64_8); \ + __ret_231 = __noswap_splatq_lane_mf8(__rev0_231, __p1_231); \ + __ret_231 = __builtin_shufflevector(__ret_231, __ret_231, __lane_reverse_128_8); \ + __ret_231; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_lane_mf8(__p0_232, __p1_232) __extension__ ({ \ + mfloat8x8_t __ret_232; \ + mfloat8x8_t __s0_232 = __p0_232; \ + __ret_232 = splat_lane_mf8(__s0_232, __p1_232); \ + __ret_232; \ +}) +#else +#define vdup_lane_mf8(__p0_233, __p1_233) __extension__ ({ \ + mfloat8x8_t __ret_233; \ + mfloat8x8_t __s0_233 = __p0_233; \ + mfloat8x8_t __rev0_233; __rev0_233 = __builtin_shufflevector(__s0_233, __s0_233, __lane_reverse_64_8); \ + __ret_233 = __noswap_splat_lane_mf8(__rev0_233, __p1_233); \ + __ret_233 = __builtin_shufflevector(__ret_233, __ret_233, __lane_reverse_64_8); \ + __ret_233; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vdupq_n_mf8(mfloat8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vdupq_n_mf8(mfloat8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vdup_n_mf8(mfloat8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vdup_n_mf8(mfloat8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vextq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vextq_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vext_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 12)); \ + __ret; \ +}) +#else +#define vext_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 12)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vget_high_mf8(mfloat8x16_t __p0) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vget_high_mf8(mfloat8x16_t __p0) { + mfloat8x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 8, 9, 10, 11, 12, 13, 14, 15); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vgetq_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vgetq_lane_mf8(__s0, __p1)); \ + __ret; \ +}) +#else +#define vgetq_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vgetq_lane_mf8(__rev0, __p1)); \ + __ret; \ +}) +#define __noswap_vgetq_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vgetq_lane_mf8(__s0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vget_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vget_lane_mf8(__s0, __p1)); \ + __ret; \ +}) +#else +#define vget_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vget_lane_mf8(__rev0, __p1)); \ + __ret; \ +}) +#define __noswap_vget_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vget_lane_mf8(__s0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vget_low_mf8(mfloat8x16_t __p0) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vget_low_mf8(mfloat8x16_t __p0) { + mfloat8x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 0, 1, 2, 3, 4, 5, 6, 7); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vmovq_n_mf8(mfloat8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vmovq_n_mf8(mfloat8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vmov_n_mf8(mfloat8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vmov_n_mf8(mfloat8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vrev16q_mf8(mfloat8x16_t __p0) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vrev16q_mf8(mfloat8x16_t __p0) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vrev16_mf8(mfloat8x8_t __p0) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vrev16_mf8(mfloat8x8_t __p0) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 1, 0, 3, 2, 5, 4, 7, 6); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vrev32q_mf8(mfloat8x16_t __p0) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vrev32q_mf8(mfloat8x16_t __p0) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vrev32_mf8(mfloat8x8_t __p0) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vrev32_mf8(mfloat8x8_t __p0) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 3, 2, 1, 0, 7, 6, 5, 4); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vrev64q_mf8(mfloat8x16_t __p0) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vrev64q_mf8(mfloat8x16_t __p0) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vrev64_mf8(mfloat8x8_t __p0) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vrev64_mf8(mfloat8x8_t __p0) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev0, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsetq_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vsetq_lane_mf8(__s0, __s1, __p2)); \ + __ret; \ +}) +#else +#define vsetq_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vsetq_lane_mf8(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ + __ret; \ +}) +#define __noswap_vsetq_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vsetq_lane_mf8(__s0, __s1, __p2)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vset_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vset_lane_mf8(__s0, __s1, __p2)); \ + __ret; \ +}) +#else +#define vset_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vset_lane_mf8(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#define __noswap_vset_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vset_lane_mf8(__s0, __s1, __p2)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl2_mf8(mfloat8x8x2_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl2_mf8(mfloat8x8x2_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl2_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl3_mf8(mfloat8x8x3_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl3_mf8(mfloat8x8x3_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl3_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl4_mf8(mfloat8x8x4_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __p0.val[0]), __builtin_bit_cast(int8x8_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbl4_mf8(mfloat8x8x4_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_64_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_64_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_64_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbl4_v(__builtin_bit_cast(int8x8_t, __rev0.val[0]), __builtin_bit_cast(int8x8_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx2_mf8(mfloat8x8_t __p0, mfloat8x8x2_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx2_mf8(mfloat8x8_t __p0, mfloat8x8x2_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx3_mf8(mfloat8x8_t __p0, mfloat8x8x3_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx3_mf8(mfloat8x8_t __p0, mfloat8x8x3_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx4_mf8(mfloat8x8_t __p0, mfloat8x8x4_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1.val[0]), __builtin_bit_cast(int8x8_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtbx4_mf8(mfloat8x8_t __p0, mfloat8x8x4_t __p1, mfloat8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_64_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_64_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_64_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_64_8); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16x2_t vtrnq_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16x2_t __ret; + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 44); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16x2_t vtrnq_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16x2_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vtrnq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 44); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8x2_t vtrn_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8x2_t __ret; + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 12); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8x2_t vtrn_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8x2_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vtrn_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 12); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16x2_t vuzpq_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16x2_t __ret; + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 44); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16x2_t vuzpq_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16x2_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vuzpq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 44); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8x2_t vuzp_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8x2_t __ret; + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 12); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8x2_t vuzp_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8x2_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vuzp_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 12); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16x2_t vzipq_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16x2_t __ret; + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 44); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16x2_t vzipq_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16x2_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __builtin_neon_vzipq_v(&__ret, __builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 44); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8x2_t vzip_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8x2_t __ret; + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 12); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8x2_t vzip_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8x2_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __builtin_neon_vzip_v(&__ret, __builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 12); + + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); + return __ret; +} +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float64x2_t vamaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vamaxq_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vamaxq_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float64x2_t vamaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vamaxq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vamaxq_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -43214,16 +44154,16 @@ __ai __attribute__((target("neon,faminmax"))) float64x2_t vamaxq_f64(float64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float32x4_t vamaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vamaxq_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vamaxq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float32x4_t vamaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vamaxq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vamaxq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -43231,16 +44171,16 @@ __ai __attribute__((target("neon,faminmax"))) float32x4_t vamaxq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float16x8_t vamaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vamaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vamaxq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float16x8_t vamaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vamaxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vamaxq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -43248,16 +44188,16 @@ __ai __attribute__((target("neon,faminmax"))) float16x8_t vamaxq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float32x2_t vamax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vamax_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vamax_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float32x2_t vamax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vamax_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vamax_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -43265,16 +44205,16 @@ __ai __attribute__((target("neon,faminmax"))) float32x2_t vamax_f32(float32x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float16x4_t vamax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vamax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vamax_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float16x4_t vamax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vamax_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vamax_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -43282,16 +44222,16 @@ __ai __attribute__((target("neon,faminmax"))) float16x4_t vamax_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float64x2_t vaminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vaminq_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vaminq_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float64x2_t vaminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vaminq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vaminq_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -43299,16 +44239,16 @@ __ai __attribute__((target("neon,faminmax"))) float64x2_t vaminq_f64(float64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float32x4_t vaminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vaminq_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vaminq_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float32x4_t vaminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vaminq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vaminq_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -43316,16 +44256,16 @@ __ai __attribute__((target("neon,faminmax"))) float32x4_t vaminq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float16x8_t vaminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vaminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vaminq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float16x8_t vaminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vaminq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vaminq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -43333,16 +44273,16 @@ __ai __attribute__((target("neon,faminmax"))) float16x8_t vaminq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float32x2_t vamin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vamin_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vamin_f32(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float32x2_t vamin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vamin_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vamin_f32(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -43350,16 +44290,16 @@ __ai __attribute__((target("neon,faminmax"))) float32x2_t vamin_f32(float32x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon,faminmax"))) float16x4_t vamin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vamin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vamin_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("neon,faminmax"))) float16x4_t vamin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vamin_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vamin_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -43368,105 +44308,105 @@ __ai __attribute__((target("neon,faminmax"))) float16x4_t vamin_f16(float16x4_t #if defined(__aarch64__) || defined(__arm64ec__) __ai __attribute__((target("aes,neon"))) poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); + __ret = __builtin_bit_cast(poly128_t, __builtin_neon_vmull_p64(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_bf16(__p0_230, __p1_230, __p2_230, __p3_230) __extension__ ({ \ - bfloat16x8_t __ret_230; \ - bfloat16x8_t __s0_230 = __p0_230; \ - bfloat16x4_t __s2_230 = __p2_230; \ - __ret_230 = vsetq_lane_bf16(vget_lane_bf16(__s2_230, __p3_230), __s0_230, __p1_230); \ - __ret_230; \ -}) -#else -#define vcopyq_lane_bf16(__p0_231, __p1_231, __p2_231, __p3_231) __extension__ ({ \ - bfloat16x8_t __ret_231; \ - bfloat16x8_t __s0_231 = __p0_231; \ - bfloat16x4_t __s2_231 = __p2_231; \ - bfloat16x8_t __rev0_231; __rev0_231 = __builtin_shufflevector(__s0_231, __s0_231, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_231; __rev2_231 = __builtin_shufflevector(__s2_231, __s2_231, 3, 2, 1, 0); \ - __ret_231 = __noswap_vsetq_lane_bf16(__noswap_vget_lane_bf16(__rev2_231, __p3_231), __rev0_231, __p1_231); \ - __ret_231 = __builtin_shufflevector(__ret_231, __ret_231, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_231; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_bf16(__p0_232, __p1_232, __p2_232, __p3_232) __extension__ ({ \ - bfloat16x4_t __ret_232; \ - bfloat16x4_t __s0_232 = __p0_232; \ - bfloat16x4_t __s2_232 = __p2_232; \ - __ret_232 = vset_lane_bf16(vget_lane_bf16(__s2_232, __p3_232), __s0_232, __p1_232); \ - __ret_232; \ -}) -#else -#define vcopy_lane_bf16(__p0_233, __p1_233, __p2_233, __p3_233) __extension__ ({ \ - bfloat16x4_t __ret_233; \ - bfloat16x4_t __s0_233 = __p0_233; \ - bfloat16x4_t __s2_233 = __p2_233; \ - bfloat16x4_t __rev0_233; __rev0_233 = __builtin_shufflevector(__s0_233, __s0_233, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_233; __rev2_233 = __builtin_shufflevector(__s2_233, __s2_233, 3, 2, 1, 0); \ - __ret_233 = __noswap_vset_lane_bf16(__noswap_vget_lane_bf16(__rev2_233, __p3_233), __rev0_233, __p1_233); \ - __ret_233 = __builtin_shufflevector(__ret_233, __ret_233, 3, 2, 1, 0); \ - __ret_233; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_bf16(__p0_234, __p1_234, __p2_234, __p3_234) __extension__ ({ \ +#define vcopyq_lane_bf16(__p0_234, __p1_234, __p2_234, __p3_234) __extension__ ({ \ bfloat16x8_t __ret_234; \ bfloat16x8_t __s0_234 = __p0_234; \ - bfloat16x8_t __s2_234 = __p2_234; \ - __ret_234 = vsetq_lane_bf16(vgetq_lane_bf16(__s2_234, __p3_234), __s0_234, __p1_234); \ + bfloat16x4_t __s2_234 = __p2_234; \ + __ret_234 = vsetq_lane_bf16(vget_lane_bf16(__s2_234, __p3_234), __s0_234, __p1_234); \ __ret_234; \ }) #else -#define vcopyq_laneq_bf16(__p0_235, __p1_235, __p2_235, __p3_235) __extension__ ({ \ +#define vcopyq_lane_bf16(__p0_235, __p1_235, __p2_235, __p3_235) __extension__ ({ \ bfloat16x8_t __ret_235; \ bfloat16x8_t __s0_235 = __p0_235; \ - bfloat16x8_t __s2_235 = __p2_235; \ - bfloat16x8_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_235; __rev2_235 = __builtin_shufflevector(__s2_235, __s2_235, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_235 = __noswap_vsetq_lane_bf16(__noswap_vgetq_lane_bf16(__rev2_235, __p3_235), __rev0_235, __p1_235); \ - __ret_235 = __builtin_shufflevector(__ret_235, __ret_235, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __s2_235 = __p2_235; \ + bfloat16x8_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, __lane_reverse_128_16); \ + bfloat16x4_t __rev2_235; __rev2_235 = __builtin_shufflevector(__s2_235, __s2_235, __lane_reverse_64_16); \ + __ret_235 = __noswap_vsetq_lane_bf16(__noswap_vget_lane_bf16(__rev2_235, __p3_235), __rev0_235, __p1_235); \ + __ret_235 = __builtin_shufflevector(__ret_235, __ret_235, __lane_reverse_128_16); \ __ret_235; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_bf16(__p0_236, __p1_236, __p2_236, __p3_236) __extension__ ({ \ +#define vcopy_lane_bf16(__p0_236, __p1_236, __p2_236, __p3_236) __extension__ ({ \ bfloat16x4_t __ret_236; \ bfloat16x4_t __s0_236 = __p0_236; \ - bfloat16x8_t __s2_236 = __p2_236; \ - __ret_236 = vset_lane_bf16(vgetq_lane_bf16(__s2_236, __p3_236), __s0_236, __p1_236); \ + bfloat16x4_t __s2_236 = __p2_236; \ + __ret_236 = vset_lane_bf16(vget_lane_bf16(__s2_236, __p3_236), __s0_236, __p1_236); \ __ret_236; \ }) #else -#define vcopy_laneq_bf16(__p0_237, __p1_237, __p2_237, __p3_237) __extension__ ({ \ +#define vcopy_lane_bf16(__p0_237, __p1_237, __p2_237, __p3_237) __extension__ ({ \ bfloat16x4_t __ret_237; \ bfloat16x4_t __s0_237 = __p0_237; \ - bfloat16x8_t __s2_237 = __p2_237; \ - bfloat16x4_t __rev0_237; __rev0_237 = __builtin_shufflevector(__s0_237, __s0_237, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_237; __rev2_237 = __builtin_shufflevector(__s2_237, __s2_237, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_237 = __noswap_vset_lane_bf16(__noswap_vgetq_lane_bf16(__rev2_237, __p3_237), __rev0_237, __p1_237); \ - __ret_237 = __builtin_shufflevector(__ret_237, __ret_237, 3, 2, 1, 0); \ + bfloat16x4_t __s2_237 = __p2_237; \ + bfloat16x4_t __rev0_237; __rev0_237 = __builtin_shufflevector(__s0_237, __s0_237, __lane_reverse_64_16); \ + bfloat16x4_t __rev2_237; __rev2_237 = __builtin_shufflevector(__s2_237, __s2_237, __lane_reverse_64_16); \ + __ret_237 = __noswap_vset_lane_bf16(__noswap_vget_lane_bf16(__rev2_237, __p3_237), __rev0_237, __p1_237); \ + __ret_237 = __builtin_shufflevector(__ret_237, __ret_237, __lane_reverse_64_16); \ __ret_237; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vcopyq_laneq_bf16(__p0_238, __p1_238, __p2_238, __p3_238) __extension__ ({ \ + bfloat16x8_t __ret_238; \ + bfloat16x8_t __s0_238 = __p0_238; \ + bfloat16x8_t __s2_238 = __p2_238; \ + __ret_238 = vsetq_lane_bf16(vgetq_lane_bf16(__s2_238, __p3_238), __s0_238, __p1_238); \ + __ret_238; \ +}) +#else +#define vcopyq_laneq_bf16(__p0_239, __p1_239, __p2_239, __p3_239) __extension__ ({ \ + bfloat16x8_t __ret_239; \ + bfloat16x8_t __s0_239 = __p0_239; \ + bfloat16x8_t __s2_239 = __p2_239; \ + bfloat16x8_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, __lane_reverse_128_16); \ + bfloat16x8_t __rev2_239; __rev2_239 = __builtin_shufflevector(__s2_239, __s2_239, __lane_reverse_128_16); \ + __ret_239 = __noswap_vsetq_lane_bf16(__noswap_vgetq_lane_bf16(__rev2_239, __p3_239), __rev0_239, __p1_239); \ + __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, __lane_reverse_128_16); \ + __ret_239; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_bf16(__p0_240, __p1_240, __p2_240, __p3_240) __extension__ ({ \ + bfloat16x4_t __ret_240; \ + bfloat16x4_t __s0_240 = __p0_240; \ + bfloat16x8_t __s2_240 = __p2_240; \ + __ret_240 = vset_lane_bf16(vgetq_lane_bf16(__s2_240, __p3_240), __s0_240, __p1_240); \ + __ret_240; \ +}) +#else +#define vcopy_laneq_bf16(__p0_241, __p1_241, __p2_241, __p3_241) __extension__ ({ \ + bfloat16x4_t __ret_241; \ + bfloat16x4_t __s0_241 = __p0_241; \ + bfloat16x8_t __s2_241 = __p2_241; \ + bfloat16x4_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, __lane_reverse_64_16); \ + bfloat16x8_t __rev2_241; __rev2_241 = __builtin_shufflevector(__s2_241, __s2_241, __lane_reverse_128_16); \ + __ret_241 = __noswap_vset_lane_bf16(__noswap_vgetq_lane_bf16(__rev2_241, __p3_241), __rev0_241, __p1_241); \ + __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, __lane_reverse_64_16); \ + __ret_241; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t) __builtin_neon_vcvt_bf16_f32((int8x16_t)__p0, 11); + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vcvt_bf16_f32(__builtin_bit_cast(int8x16_t, __p0), 11)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (bfloat16x4_t) __builtin_neon_vcvt_bf16_f32((int8x16_t)__rev0, 11); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(bfloat16x4_t, __builtin_neon_vcvt_bf16_f32(__builtin_bit_cast(int8x16_t, __rev0), 11)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -43474,16 +44414,16 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvtq_high_bf16_f32((int8x16_t)__p0, (int8x16_t)__p1, 43); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvtq_high_bf16_f32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 43)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvtq_high_bf16_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 43); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvtq_high_bf16_f32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 43)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -43491,424 +44431,424 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon_vcvtq_low_bf16_f32((int8x16_t)__p0, 43); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvtq_low_bf16_f32(__builtin_bit_cast(int8x16_t, __p0), 43)); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon_vcvtq_low_bf16_f32((int8x16_t)__rev0, 43); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(bfloat16x8_t, __builtin_neon_vcvtq_low_bf16_f32(__builtin_bit_cast(int8x16_t, __rev0), 43)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif __ai __attribute__((target("bf16,neon"))) poly8x8_t vreinterpret_p8_bf16(bfloat16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly64x1_t vreinterpret_p64_bf16(bfloat16x4_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly16x4_t vreinterpret_p16_bf16(bfloat16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly8x16_t vreinterpretq_p8_bf16(bfloat16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly128_t vreinterpretq_p128_bf16(bfloat16x8_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly64x2_t vreinterpretq_p64_bf16(bfloat16x8_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) poly16x8_t vreinterpretq_p16_bf16(bfloat16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint8x16_t vreinterpretq_u8_bf16(bfloat16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint32x4_t vreinterpretq_u32_bf16(bfloat16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint64x2_t vreinterpretq_u64_bf16(bfloat16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint16x8_t vreinterpretq_u16_bf16(bfloat16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int8x16_t vreinterpretq_s8_bf16(bfloat16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float64x2_t vreinterpretq_f64_bf16(bfloat16x8_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x4_t vreinterpretq_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float16x8_t vreinterpretq_f16_bf16(bfloat16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int32x4_t vreinterpretq_s32_bf16(bfloat16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int64x2_t vreinterpretq_s64_bf16(bfloat16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int16x8_t vreinterpretq_s16_bf16(bfloat16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint8x8_t vreinterpret_u8_bf16(bfloat16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint32x2_t vreinterpret_u32_bf16(bfloat16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint64x1_t vreinterpret_u64_bf16(bfloat16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) uint16x4_t vreinterpret_u16_bf16(bfloat16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int8x8_t vreinterpret_s8_bf16(bfloat16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float64x1_t vreinterpret_f64_bf16(bfloat16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float32x2_t vreinterpret_f32_bf16(bfloat16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) float16x4_t vreinterpret_f16_bf16(bfloat16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int32x2_t vreinterpret_s32_bf16(bfloat16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int64x1_t vreinterpret_s64_bf16(bfloat16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) int16x4_t vreinterpret_s16_bf16(bfloat16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p8(poly8x16_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p128(poly128_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p64(poly64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_p16(poly16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u8(uint8x16_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u32(uint32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u64(uint64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_u16(uint16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s8(int8x16_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_f64(float64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_f16(float16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s32(int32x4_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s64(int64x2_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vreinterpretq_bf16_s16(int16x8_t __p0) { bfloat16x8_t __ret; - __ret = (bfloat16x8_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x8_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_p8(poly8x8_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_p64(poly64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_p16(poly16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u8(uint8x8_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u32(uint32x2_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u64(uint64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_u16(uint16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s8(int8x8_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_f64(float64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_f32(float32x2_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_f16(float16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s32(int32x2_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s64(int64x1_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int16x4_t __p0) { bfloat16x4_t __ret; - __ret = (bfloat16x4_t)(__p0); + __ret = __builtin_bit_cast(bfloat16x4_t, __p0); return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vdotq_laneq_u32(__p0_238, __p1_238, __p2_238, __p3_238) __extension__ ({ \ - uint32x4_t __ret_238; \ - uint32x4_t __s0_238 = __p0_238; \ - uint8x16_t __s1_238 = __p1_238; \ - uint8x16_t __s2_238 = __p2_238; \ - __ret_238 = vdotq_u32(__s0_238, __s1_238, __builtin_bit_cast(uint8x16_t, splatq_laneq_u32(__builtin_bit_cast(uint32x4_t, __s2_238), __p3_238))); \ - __ret_238; \ -}) -#else -#define vdotq_laneq_u32(__p0_239, __p1_239, __p2_239, __p3_239) __extension__ ({ \ - uint32x4_t __ret_239; \ - uint32x4_t __s0_239 = __p0_239; \ - uint8x16_t __s1_239 = __p1_239; \ - uint8x16_t __s2_239 = __p2_239; \ - uint32x4_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 3, 2, 1, 0); \ - uint8x16_t __rev1_239; __rev1_239 = __builtin_shufflevector(__s1_239, __s1_239, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_239; __rev2_239 = __builtin_shufflevector(__s2_239, __s2_239, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_239 = __noswap_vdotq_u32(__rev0_239, __rev1_239, __builtin_bit_cast(uint8x16_t, __noswap_splatq_laneq_u32(__builtin_bit_cast(uint32x4_t, __rev2_239), __p3_239))); \ - __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 3, 2, 1, 0); \ - __ret_239; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdotq_laneq_s32(__p0_240, __p1_240, __p2_240, __p3_240) __extension__ ({ \ - int32x4_t __ret_240; \ - int32x4_t __s0_240 = __p0_240; \ - int8x16_t __s1_240 = __p1_240; \ - int8x16_t __s2_240 = __p2_240; \ - __ret_240 = vdotq_s32(__s0_240, __s1_240, __builtin_bit_cast(int8x16_t, splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_240), __p3_240))); \ - __ret_240; \ -}) -#else -#define vdotq_laneq_s32(__p0_241, __p1_241, __p2_241, __p3_241) __extension__ ({ \ - int32x4_t __ret_241; \ - int32x4_t __s0_241 = __p0_241; \ - int8x16_t __s1_241 = __p1_241; \ - int8x16_t __s2_241 = __p2_241; \ - int32x4_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, 3, 2, 1, 0); \ - int8x16_t __rev1_241; __rev1_241 = __builtin_shufflevector(__s1_241, __s1_241, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_241; __rev2_241 = __builtin_shufflevector(__s2_241, __s2_241, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_241 = __noswap_vdotq_s32(__rev0_241, __rev1_241, __builtin_bit_cast(int8x16_t, __noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_241), __p3_241))); \ - __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, 3, 2, 1, 0); \ - __ret_241; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdot_laneq_u32(__p0_242, __p1_242, __p2_242, __p3_242) __extension__ ({ \ - uint32x2_t __ret_242; \ - uint32x2_t __s0_242 = __p0_242; \ - uint8x8_t __s1_242 = __p1_242; \ +#define vdotq_laneq_u32(__p0_242, __p1_242, __p2_242, __p3_242) __extension__ ({ \ + uint32x4_t __ret_242; \ + uint32x4_t __s0_242 = __p0_242; \ + uint8x16_t __s1_242 = __p1_242; \ uint8x16_t __s2_242 = __p2_242; \ - __ret_242 = vdot_u32(__s0_242, __s1_242, __builtin_bit_cast(uint8x8_t, splat_laneq_u32(__builtin_bit_cast(uint32x4_t, __s2_242), __p3_242))); \ + __ret_242 = vdotq_u32(__s0_242, __s1_242, __builtin_bit_cast(uint8x16_t, splatq_laneq_u32(__builtin_bit_cast(uint32x4_t, __s2_242), __p3_242))); \ __ret_242; \ }) #else -#define vdot_laneq_u32(__p0_243, __p1_243, __p2_243, __p3_243) __extension__ ({ \ - uint32x2_t __ret_243; \ - uint32x2_t __s0_243 = __p0_243; \ - uint8x8_t __s1_243 = __p1_243; \ +#define vdotq_laneq_u32(__p0_243, __p1_243, __p2_243, __p3_243) __extension__ ({ \ + uint32x4_t __ret_243; \ + uint32x4_t __s0_243 = __p0_243; \ + uint8x16_t __s1_243 = __p1_243; \ uint8x16_t __s2_243 = __p2_243; \ - uint32x2_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, 1, 0); \ - uint8x8_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_243; __rev2_243 = __builtin_shufflevector(__s2_243, __s2_243, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_243 = __noswap_vdot_u32(__rev0_243, __rev1_243, __builtin_bit_cast(uint8x8_t, __noswap_splat_laneq_u32(__builtin_bit_cast(uint32x4_t, __rev2_243), __p3_243))); \ - __ret_243 = __builtin_shufflevector(__ret_243, __ret_243, 1, 0); \ + uint32x4_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, __lane_reverse_128_32); \ + uint8x16_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, __lane_reverse_128_8); \ + uint8x16_t __rev2_243; __rev2_243 = __builtin_shufflevector(__s2_243, __s2_243, __lane_reverse_128_8); \ + __ret_243 = __noswap_vdotq_u32(__rev0_243, __rev1_243, __builtin_bit_cast(uint8x16_t, __noswap_splatq_laneq_u32(__builtin_bit_cast(uint32x4_t, __rev2_243), __p3_243))); \ + __ret_243 = __builtin_shufflevector(__ret_243, __ret_243, __lane_reverse_128_32); \ __ret_243; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_laneq_s32(__p0_244, __p1_244, __p2_244, __p3_244) __extension__ ({ \ - int32x2_t __ret_244; \ - int32x2_t __s0_244 = __p0_244; \ - int8x8_t __s1_244 = __p1_244; \ +#define vdotq_laneq_s32(__p0_244, __p1_244, __p2_244, __p3_244) __extension__ ({ \ + int32x4_t __ret_244; \ + int32x4_t __s0_244 = __p0_244; \ + int8x16_t __s1_244 = __p1_244; \ int8x16_t __s2_244 = __p2_244; \ - __ret_244 = vdot_s32(__s0_244, __s1_244, __builtin_bit_cast(int8x8_t, splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_244), __p3_244))); \ + __ret_244 = vdotq_s32(__s0_244, __s1_244, __builtin_bit_cast(int8x16_t, splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_244), __p3_244))); \ __ret_244; \ }) #else -#define vdot_laneq_s32(__p0_245, __p1_245, __p2_245, __p3_245) __extension__ ({ \ - int32x2_t __ret_245; \ - int32x2_t __s0_245 = __p0_245; \ - int8x8_t __s1_245 = __p1_245; \ +#define vdotq_laneq_s32(__p0_245, __p1_245, __p2_245, __p3_245) __extension__ ({ \ + int32x4_t __ret_245; \ + int32x4_t __s0_245 = __p0_245; \ + int8x16_t __s1_245 = __p1_245; \ int8x16_t __s2_245 = __p2_245; \ - int32x2_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, 1, 0); \ - int8x8_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_245; __rev2_245 = __builtin_shufflevector(__s2_245, __s2_245, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_245 = __noswap_vdot_s32(__rev0_245, __rev1_245, __builtin_bit_cast(int8x8_t, __noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_245), __p3_245))); \ - __ret_245 = __builtin_shufflevector(__ret_245, __ret_245, 1, 0); \ + int32x4_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, __lane_reverse_128_32); \ + int8x16_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, __lane_reverse_128_8); \ + int8x16_t __rev2_245; __rev2_245 = __builtin_shufflevector(__s2_245, __s2_245, __lane_reverse_128_8); \ + __ret_245 = __noswap_vdotq_s32(__rev0_245, __rev1_245, __builtin_bit_cast(int8x16_t, __noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_245), __p3_245))); \ + __ret_245 = __builtin_shufflevector(__ret_245, __ret_245, __lane_reverse_128_32); \ __ret_245; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vdot_laneq_u32(__p0_246, __p1_246, __p2_246, __p3_246) __extension__ ({ \ + uint32x2_t __ret_246; \ + uint32x2_t __s0_246 = __p0_246; \ + uint8x8_t __s1_246 = __p1_246; \ + uint8x16_t __s2_246 = __p2_246; \ + __ret_246 = vdot_u32(__s0_246, __s1_246, __builtin_bit_cast(uint8x8_t, splat_laneq_u32(__builtin_bit_cast(uint32x4_t, __s2_246), __p3_246))); \ + __ret_246; \ +}) +#else +#define vdot_laneq_u32(__p0_247, __p1_247, __p2_247, __p3_247) __extension__ ({ \ + uint32x2_t __ret_247; \ + uint32x2_t __s0_247 = __p0_247; \ + uint8x8_t __s1_247 = __p1_247; \ + uint8x16_t __s2_247 = __p2_247; \ + uint32x2_t __rev0_247; __rev0_247 = __builtin_shufflevector(__s0_247, __s0_247, __lane_reverse_64_32); \ + uint8x8_t __rev1_247; __rev1_247 = __builtin_shufflevector(__s1_247, __s1_247, __lane_reverse_64_8); \ + uint8x16_t __rev2_247; __rev2_247 = __builtin_shufflevector(__s2_247, __s2_247, __lane_reverse_128_8); \ + __ret_247 = __noswap_vdot_u32(__rev0_247, __rev1_247, __builtin_bit_cast(uint8x8_t, __noswap_splat_laneq_u32(__builtin_bit_cast(uint32x4_t, __rev2_247), __p3_247))); \ + __ret_247 = __builtin_shufflevector(__ret_247, __ret_247, __lane_reverse_64_32); \ + __ret_247; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdot_laneq_s32(__p0_248, __p1_248, __p2_248, __p3_248) __extension__ ({ \ + int32x2_t __ret_248; \ + int32x2_t __s0_248 = __p0_248; \ + int8x8_t __s1_248 = __p1_248; \ + int8x16_t __s2_248 = __p2_248; \ + __ret_248 = vdot_s32(__s0_248, __s1_248, __builtin_bit_cast(int8x8_t, splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_248), __p3_248))); \ + __ret_248; \ +}) +#else +#define vdot_laneq_s32(__p0_249, __p1_249, __p2_249, __p3_249) __extension__ ({ \ + int32x2_t __ret_249; \ + int32x2_t __s0_249 = __p0_249; \ + int8x8_t __s1_249 = __p1_249; \ + int8x16_t __s2_249 = __p2_249; \ + int32x2_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, __lane_reverse_64_32); \ + int8x8_t __rev1_249; __rev1_249 = __builtin_shufflevector(__s1_249, __s1_249, __lane_reverse_64_8); \ + int8x16_t __rev2_249; __rev2_249 = __builtin_shufflevector(__s2_249, __s2_249, __lane_reverse_128_8); \ + __ret_249 = __noswap_vdot_s32(__rev0_249, __rev1_249, __builtin_bit_cast(int8x8_t, __noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_249), __p3_249))); \ + __ret_249 = __builtin_shufflevector(__ret_249, __ret_249, __lane_reverse_64_32); \ + __ret_249; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlalq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlalq_high_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vfmlalq_high_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlalq_high_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlalq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlalq_high_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -43916,22 +44856,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlalq_high_f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlal_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlal_high_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vfmlal_high_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlal_high_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlal_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlal_high_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -43939,22 +44879,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlal_high_f1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlalq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlalq_low_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vfmlalq_low_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlalq_low_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlalq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlalq_low_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -43962,22 +44902,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlalq_low_f1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlal_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlal_low_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vfmlal_low_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlal_low_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlal_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlal_low_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -43985,22 +44925,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlal_low_f16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlslq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlslq_high_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vfmlslq_high_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlslq_high_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlslq_high_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlslq_high_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -44008,22 +44948,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlslq_high_f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlsl_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlsl_high_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vfmlsl_high_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlsl_high_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlsl_high_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlsl_high_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -44031,22 +44971,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlsl_high_f1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlslq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlslq_low_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x4_t vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vfmlslq_low_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlslq_low_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vfmlslq_low_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmlslq_low_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 41)); return __ret; } #endif @@ -44054,22 +44994,22 @@ __ai __attribute__((target("fp16fml,neon"))) float32x4_t __noswap_vfmlslq_low_f1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlsl_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlsl_low_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #else __ai __attribute__((target("fp16fml,neon"))) float32x2_t vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (float32x2_t) __builtin_neon_vfmlsl_low_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlsl_low_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("fp16fml,neon"))) float32x2_t __noswap_vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vfmlsl_low_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfmlsl_low_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 9)); return __ret; } #endif @@ -44083,10 +45023,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vdivq_f16(float16x8_t #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vdivq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -44100,10 +45040,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -44114,7 +45054,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_lane_f16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -44123,8 +45063,8 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__rev2, __p3); \ + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_lane_f16(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #define __noswap_vfmah_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -44132,7 +45072,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - __ret = (float16_t) __builtin_neon_vfmah_lane_f16(__s0, __s1, (float16x4_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_lane_f16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #endif @@ -44143,7 +45083,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_lane_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_lane_f16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 40)); \ __ret; \ }) #else @@ -44152,11 +45092,11 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vfmaq_lane_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_lane_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), __p3, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vfmaq_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -44164,7 +45104,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_lane_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_lane_f16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 40)); \ __ret; \ }) #endif @@ -44175,7 +45115,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - __ret = (float16x4_t) __builtin_neon_vfma_lane_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_lane_f16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 8)); \ __ret; \ }) #else @@ -44184,11 +45124,11 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vfma_lane_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_lane_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), __p3, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vfma_lane_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -44196,7 +45136,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x4_t __s2 = __p2; \ - __ret = (float16x4_t) __builtin_neon_vfma_lane_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_lane_f16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 8)); \ __ret; \ }) #endif @@ -44207,7 +45147,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_laneq_f16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -44216,8 +45156,8 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__rev2, __p3); \ + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_laneq_f16(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #define __noswap_vfmah_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -44225,7 +45165,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16_t __s0 = __p0; \ float16_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - __ret = (float16_t) __builtin_neon_vfmah_laneq_f16(__s0, __s1, (float16x8_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vfmah_laneq_f16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #endif @@ -44236,7 +45176,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_laneq_f16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 40)); \ __ret; \ }) #else @@ -44245,11 +45185,11 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 40); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_laneq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 40)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #define __noswap_vfmaq_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -44257,7 +45197,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - __ret = (float16x8_t) __builtin_neon_vfmaq_laneq_f16((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 40); \ + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vfmaq_laneq_f16(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 40)); \ __ret; \ }) #endif @@ -44268,7 +45208,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - __ret = (float16x4_t) __builtin_neon_vfma_laneq_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_laneq_f16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 8)); \ __ret; \ }) #else @@ -44277,11 +45217,11 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16x4_t) __builtin_neon_vfma_laneq_f16((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 8); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_laneq_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 8)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #define __noswap_vfma_laneq_f16(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -44289,7 +45229,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16x8_t __s2 = __p2; \ - __ret = (float16x4_t) __builtin_neon_vfma_laneq_f16((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 8); \ + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vfma_laneq_f16(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 8)); \ __ret; \ }) #endif @@ -44309,10 +45249,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ __ret = __noswap_vfmaq_f16(__rev0, __rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -44332,152 +45272,152 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ __ret = __noswap_vfma_f16(__rev0, __rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsh_lane_f16(__p0_246, __p1_246, __p2_246, __p3_246) __extension__ ({ \ - float16_t __ret_246; \ - float16_t __s0_246 = __p0_246; \ - float16_t __s1_246 = __p1_246; \ - float16x4_t __s2_246 = __p2_246; \ - __ret_246 = vfmah_lane_f16(__s0_246, -__s1_246, __s2_246, __p3_246); \ - __ret_246; \ -}) -#else -#define vfmsh_lane_f16(__p0_247, __p1_247, __p2_247, __p3_247) __extension__ ({ \ - float16_t __ret_247; \ - float16_t __s0_247 = __p0_247; \ - float16_t __s1_247 = __p1_247; \ - float16x4_t __s2_247 = __p2_247; \ - float16x4_t __rev2_247; __rev2_247 = __builtin_shufflevector(__s2_247, __s2_247, 3, 2, 1, 0); \ - __ret_247 = __noswap_vfmah_lane_f16(__s0_247, -__s1_247, __rev2_247, __p3_247); \ - __ret_247; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f16(__p0_248, __p1_248, __p2_248, __p3_248) __extension__ ({ \ - float16x8_t __ret_248; \ - float16x8_t __s0_248 = __p0_248; \ - float16x8_t __s1_248 = __p1_248; \ - float16x4_t __s2_248 = __p2_248; \ - __ret_248 = vfmaq_lane_f16(__s0_248, -__s1_248, __s2_248, __p3_248); \ - __ret_248; \ -}) -#else -#define vfmsq_lane_f16(__p0_249, __p1_249, __p2_249, __p3_249) __extension__ ({ \ - float16x8_t __ret_249; \ - float16x8_t __s0_249 = __p0_249; \ - float16x8_t __s1_249 = __p1_249; \ - float16x4_t __s2_249 = __p2_249; \ - float16x8_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_249; __rev1_249 = __builtin_shufflevector(__s1_249, __s1_249, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_249; __rev2_249 = __builtin_shufflevector(__s2_249, __s2_249, 3, 2, 1, 0); \ - __ret_249 = __noswap_vfmaq_lane_f16(__rev0_249, -__rev1_249, __rev2_249, __p3_249); \ - __ret_249 = __builtin_shufflevector(__ret_249, __ret_249, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_249; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f16(__p0_250, __p1_250, __p2_250, __p3_250) __extension__ ({ \ - float16x4_t __ret_250; \ - float16x4_t __s0_250 = __p0_250; \ - float16x4_t __s1_250 = __p1_250; \ +#define vfmsh_lane_f16(__p0_250, __p1_250, __p2_250, __p3_250) __extension__ ({ \ + float16_t __ret_250; \ + float16_t __s0_250 = __p0_250; \ + float16_t __s1_250 = __p1_250; \ float16x4_t __s2_250 = __p2_250; \ - __ret_250 = vfma_lane_f16(__s0_250, -__s1_250, __s2_250, __p3_250); \ + __ret_250 = vfmah_lane_f16(__s0_250, -__s1_250, __s2_250, __p3_250); \ __ret_250; \ }) #else -#define vfms_lane_f16(__p0_251, __p1_251, __p2_251, __p3_251) __extension__ ({ \ - float16x4_t __ret_251; \ - float16x4_t __s0_251 = __p0_251; \ - float16x4_t __s1_251 = __p1_251; \ +#define vfmsh_lane_f16(__p0_251, __p1_251, __p2_251, __p3_251) __extension__ ({ \ + float16_t __ret_251; \ + float16_t __s0_251 = __p0_251; \ + float16_t __s1_251 = __p1_251; \ float16x4_t __s2_251 = __p2_251; \ - float16x4_t __rev0_251; __rev0_251 = __builtin_shufflevector(__s0_251, __s0_251, 3, 2, 1, 0); \ - float16x4_t __rev1_251; __rev1_251 = __builtin_shufflevector(__s1_251, __s1_251, 3, 2, 1, 0); \ - float16x4_t __rev2_251; __rev2_251 = __builtin_shufflevector(__s2_251, __s2_251, 3, 2, 1, 0); \ - __ret_251 = __noswap_vfma_lane_f16(__rev0_251, -__rev1_251, __rev2_251, __p3_251); \ - __ret_251 = __builtin_shufflevector(__ret_251, __ret_251, 3, 2, 1, 0); \ + float16x4_t __rev2_251; __rev2_251 = __builtin_shufflevector(__s2_251, __s2_251, __lane_reverse_64_16); \ + __ret_251 = __noswap_vfmah_lane_f16(__s0_251, -__s1_251, __rev2_251, __p3_251); \ __ret_251; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsh_laneq_f16(__p0_252, __p1_252, __p2_252, __p3_252) __extension__ ({ \ - float16_t __ret_252; \ - float16_t __s0_252 = __p0_252; \ - float16_t __s1_252 = __p1_252; \ - float16x8_t __s2_252 = __p2_252; \ - __ret_252 = vfmah_laneq_f16(__s0_252, -__s1_252, __s2_252, __p3_252); \ +#define vfmsq_lane_f16(__p0_252, __p1_252, __p2_252, __p3_252) __extension__ ({ \ + float16x8_t __ret_252; \ + float16x8_t __s0_252 = __p0_252; \ + float16x8_t __s1_252 = __p1_252; \ + float16x4_t __s2_252 = __p2_252; \ + __ret_252 = vfmaq_lane_f16(__s0_252, -__s1_252, __s2_252, __p3_252); \ __ret_252; \ }) #else -#define vfmsh_laneq_f16(__p0_253, __p1_253, __p2_253, __p3_253) __extension__ ({ \ - float16_t __ret_253; \ - float16_t __s0_253 = __p0_253; \ - float16_t __s1_253 = __p1_253; \ - float16x8_t __s2_253 = __p2_253; \ - float16x8_t __rev2_253; __rev2_253 = __builtin_shufflevector(__s2_253, __s2_253, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_253 = __noswap_vfmah_laneq_f16(__s0_253, -__s1_253, __rev2_253, __p3_253); \ +#define vfmsq_lane_f16(__p0_253, __p1_253, __p2_253, __p3_253) __extension__ ({ \ + float16x8_t __ret_253; \ + float16x8_t __s0_253 = __p0_253; \ + float16x8_t __s1_253 = __p1_253; \ + float16x4_t __s2_253 = __p2_253; \ + float16x8_t __rev0_253; __rev0_253 = __builtin_shufflevector(__s0_253, __s0_253, __lane_reverse_128_16); \ + float16x8_t __rev1_253; __rev1_253 = __builtin_shufflevector(__s1_253, __s1_253, __lane_reverse_128_16); \ + float16x4_t __rev2_253; __rev2_253 = __builtin_shufflevector(__s2_253, __s2_253, __lane_reverse_64_16); \ + __ret_253 = __noswap_vfmaq_lane_f16(__rev0_253, -__rev1_253, __rev2_253, __p3_253); \ + __ret_253 = __builtin_shufflevector(__ret_253, __ret_253, __lane_reverse_128_16); \ __ret_253; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f16(__p0_254, __p1_254, __p2_254, __p3_254) __extension__ ({ \ - float16x8_t __ret_254; \ - float16x8_t __s0_254 = __p0_254; \ - float16x8_t __s1_254 = __p1_254; \ - float16x8_t __s2_254 = __p2_254; \ - __ret_254 = vfmaq_laneq_f16(__s0_254, -__s1_254, __s2_254, __p3_254); \ +#define vfms_lane_f16(__p0_254, __p1_254, __p2_254, __p3_254) __extension__ ({ \ + float16x4_t __ret_254; \ + float16x4_t __s0_254 = __p0_254; \ + float16x4_t __s1_254 = __p1_254; \ + float16x4_t __s2_254 = __p2_254; \ + __ret_254 = vfma_lane_f16(__s0_254, -__s1_254, __s2_254, __p3_254); \ __ret_254; \ }) #else -#define vfmsq_laneq_f16(__p0_255, __p1_255, __p2_255, __p3_255) __extension__ ({ \ - float16x8_t __ret_255; \ - float16x8_t __s0_255 = __p0_255; \ - float16x8_t __s1_255 = __p1_255; \ - float16x8_t __s2_255 = __p2_255; \ - float16x8_t __rev0_255; __rev0_255 = __builtin_shufflevector(__s0_255, __s0_255, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_255; __rev1_255 = __builtin_shufflevector(__s1_255, __s1_255, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_255; __rev2_255 = __builtin_shufflevector(__s2_255, __s2_255, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_255 = __noswap_vfmaq_laneq_f16(__rev0_255, -__rev1_255, __rev2_255, __p3_255); \ - __ret_255 = __builtin_shufflevector(__ret_255, __ret_255, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vfms_lane_f16(__p0_255, __p1_255, __p2_255, __p3_255) __extension__ ({ \ + float16x4_t __ret_255; \ + float16x4_t __s0_255 = __p0_255; \ + float16x4_t __s1_255 = __p1_255; \ + float16x4_t __s2_255 = __p2_255; \ + float16x4_t __rev0_255; __rev0_255 = __builtin_shufflevector(__s0_255, __s0_255, __lane_reverse_64_16); \ + float16x4_t __rev1_255; __rev1_255 = __builtin_shufflevector(__s1_255, __s1_255, __lane_reverse_64_16); \ + float16x4_t __rev2_255; __rev2_255 = __builtin_shufflevector(__s2_255, __s2_255, __lane_reverse_64_16); \ + __ret_255 = __noswap_vfma_lane_f16(__rev0_255, -__rev1_255, __rev2_255, __p3_255); \ + __ret_255 = __builtin_shufflevector(__ret_255, __ret_255, __lane_reverse_64_16); \ __ret_255; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f16(__p0_256, __p1_256, __p2_256, __p3_256) __extension__ ({ \ - float16x4_t __ret_256; \ - float16x4_t __s0_256 = __p0_256; \ - float16x4_t __s1_256 = __p1_256; \ +#define vfmsh_laneq_f16(__p0_256, __p1_256, __p2_256, __p3_256) __extension__ ({ \ + float16_t __ret_256; \ + float16_t __s0_256 = __p0_256; \ + float16_t __s1_256 = __p1_256; \ float16x8_t __s2_256 = __p2_256; \ - __ret_256 = vfma_laneq_f16(__s0_256, -__s1_256, __s2_256, __p3_256); \ + __ret_256 = vfmah_laneq_f16(__s0_256, -__s1_256, __s2_256, __p3_256); \ __ret_256; \ }) #else -#define vfms_laneq_f16(__p0_257, __p1_257, __p2_257, __p3_257) __extension__ ({ \ - float16x4_t __ret_257; \ - float16x4_t __s0_257 = __p0_257; \ - float16x4_t __s1_257 = __p1_257; \ +#define vfmsh_laneq_f16(__p0_257, __p1_257, __p2_257, __p3_257) __extension__ ({ \ + float16_t __ret_257; \ + float16_t __s0_257 = __p0_257; \ + float16_t __s1_257 = __p1_257; \ float16x8_t __s2_257 = __p2_257; \ - float16x4_t __rev0_257; __rev0_257 = __builtin_shufflevector(__s0_257, __s0_257, 3, 2, 1, 0); \ - float16x4_t __rev1_257; __rev1_257 = __builtin_shufflevector(__s1_257, __s1_257, 3, 2, 1, 0); \ - float16x8_t __rev2_257; __rev2_257 = __builtin_shufflevector(__s2_257, __s2_257, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_257 = __noswap_vfma_laneq_f16(__rev0_257, -__rev1_257, __rev2_257, __p3_257); \ - __ret_257 = __builtin_shufflevector(__ret_257, __ret_257, 3, 2, 1, 0); \ + float16x8_t __rev2_257; __rev2_257 = __builtin_shufflevector(__s2_257, __s2_257, __lane_reverse_128_16); \ + __ret_257 = __noswap_vfmah_laneq_f16(__s0_257, -__s1_257, __rev2_257, __p3_257); \ __ret_257; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vfmsq_laneq_f16(__p0_258, __p1_258, __p2_258, __p3_258) __extension__ ({ \ + float16x8_t __ret_258; \ + float16x8_t __s0_258 = __p0_258; \ + float16x8_t __s1_258 = __p1_258; \ + float16x8_t __s2_258 = __p2_258; \ + __ret_258 = vfmaq_laneq_f16(__s0_258, -__s1_258, __s2_258, __p3_258); \ + __ret_258; \ +}) +#else +#define vfmsq_laneq_f16(__p0_259, __p1_259, __p2_259, __p3_259) __extension__ ({ \ + float16x8_t __ret_259; \ + float16x8_t __s0_259 = __p0_259; \ + float16x8_t __s1_259 = __p1_259; \ + float16x8_t __s2_259 = __p2_259; \ + float16x8_t __rev0_259; __rev0_259 = __builtin_shufflevector(__s0_259, __s0_259, __lane_reverse_128_16); \ + float16x8_t __rev1_259; __rev1_259 = __builtin_shufflevector(__s1_259, __s1_259, __lane_reverse_128_16); \ + float16x8_t __rev2_259; __rev2_259 = __builtin_shufflevector(__s2_259, __s2_259, __lane_reverse_128_16); \ + __ret_259 = __noswap_vfmaq_laneq_f16(__rev0_259, -__rev1_259, __rev2_259, __p3_259); \ + __ret_259 = __builtin_shufflevector(__ret_259, __ret_259, __lane_reverse_128_16); \ + __ret_259; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfms_laneq_f16(__p0_260, __p1_260, __p2_260, __p3_260) __extension__ ({ \ + float16x4_t __ret_260; \ + float16x4_t __s0_260 = __p0_260; \ + float16x4_t __s1_260 = __p1_260; \ + float16x8_t __s2_260 = __p2_260; \ + __ret_260 = vfma_laneq_f16(__s0_260, -__s1_260, __s2_260, __p3_260); \ + __ret_260; \ +}) +#else +#define vfms_laneq_f16(__p0_261, __p1_261, __p2_261, __p3_261) __extension__ ({ \ + float16x4_t __ret_261; \ + float16x4_t __s0_261 = __p0_261; \ + float16x4_t __s1_261 = __p1_261; \ + float16x8_t __s2_261 = __p2_261; \ + float16x4_t __rev0_261; __rev0_261 = __builtin_shufflevector(__s0_261, __s0_261, __lane_reverse_64_16); \ + float16x4_t __rev1_261; __rev1_261 = __builtin_shufflevector(__s1_261, __s1_261, __lane_reverse_64_16); \ + float16x8_t __rev2_261; __rev2_261 = __builtin_shufflevector(__s2_261, __s2_261, __lane_reverse_128_16); \ + __ret_261 = __noswap_vfma_laneq_f16(__rev0_261, -__rev1_261, __rev2_261, __p3_261); \ + __ret_261 = __builtin_shufflevector(__ret_261, __ret_261, __lane_reverse_64_16); \ + __ret_261; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ #define vfmsq_n_f16(__p0, __p1, __p2) __extension__ ({ \ float16x8_t __ret; \ @@ -44493,10 +45433,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x8_t __s0 = __p0; \ float16x8_t __s1 = __p1; \ float16_t __s2 = __p2; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ __ret = __noswap_vfmaq_f16(__rev0, -__rev1, (float16x8_t) {__s2, __s2, __s2, __s2, __s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -44516,10 +45456,10 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ float16x4_t __s0 = __p0; \ float16x4_t __s1 = __p1; \ float16_t __s2 = __p2; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ __ret = __noswap_vfma_f16(__rev0, -__rev1, (float16x4_t) {__s2, __s2, __s2, __s2}); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -44528,15 +45468,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vmaxnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxnmvq_f16(__builtin_bit_cast(int8x16_t, __s0))); \ __ret; \ }) #else #define vmaxnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vmaxnmvq_f16((int8x16_t)__rev0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxnmvq_f16(__builtin_bit_cast(int8x16_t, __rev0))); \ __ret; \ }) #endif @@ -44545,15 +45485,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vmaxnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxnmv_f16(__builtin_bit_cast(int8x8_t, __s0))); \ __ret; \ }) #else #define vmaxnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vmaxnmv_f16((int8x8_t)__rev0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxnmv_f16(__builtin_bit_cast(int8x8_t, __rev0))); \ __ret; \ }) #endif @@ -44562,15 +45502,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vmaxvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxvq_f16(__builtin_bit_cast(int8x16_t, __s0))); \ __ret; \ }) #else #define vmaxvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vmaxvq_f16((int8x16_t)__rev0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxvq_f16(__builtin_bit_cast(int8x16_t, __rev0))); \ __ret; \ }) #endif @@ -44579,15 +45519,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vmaxv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxv_f16(__builtin_bit_cast(int8x8_t, __s0))); \ __ret; \ }) #else #define vmaxv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vmaxv_f16((int8x8_t)__rev0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmaxv_f16(__builtin_bit_cast(int8x8_t, __rev0))); \ __ret; \ }) #endif @@ -44596,15 +45536,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vminnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminnmvq_f16(__builtin_bit_cast(int8x16_t, __s0))); \ __ret; \ }) #else #define vminnmvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vminnmvq_f16((int8x16_t)__rev0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminnmvq_f16(__builtin_bit_cast(int8x16_t, __rev0))); \ __ret; \ }) #endif @@ -44613,15 +45553,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vminnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminnmv_f16(__builtin_bit_cast(int8x8_t, __s0))); \ __ret; \ }) #else #define vminnmv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vminnmv_f16((int8x8_t)__rev0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminnmv_f16(__builtin_bit_cast(int8x8_t, __rev0))); \ __ret; \ }) #endif @@ -44630,15 +45570,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vminvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminvq_f16(__builtin_bit_cast(int8x16_t, __s0))); \ __ret; \ }) #else #define vminvq_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vminvq_f16((int8x16_t)__rev0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminvq_f16(__builtin_bit_cast(int8x16_t, __rev0))); \ __ret; \ }) #endif @@ -44647,227 +45587,227 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vdiv_f16(float16x4_t _ #define vminv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__s0); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminv_f16(__builtin_bit_cast(int8x8_t, __s0))); \ __ret; \ }) #else #define vminv_f16(__p0) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vminv_f16((int8x8_t)__rev0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vminv_f16(__builtin_bit_cast(int8x8_t, __rev0))); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f16(__p0_258, __p1_258, __p2_258) __extension__ ({ \ - float16x8_t __ret_258; \ - float16x8_t __s0_258 = __p0_258; \ - float16x8_t __s1_258 = __p1_258; \ - __ret_258 = __s0_258 * splatq_laneq_f16(__s1_258, __p2_258); \ - __ret_258; \ -}) -#else -#define vmulq_laneq_f16(__p0_259, __p1_259, __p2_259) __extension__ ({ \ - float16x8_t __ret_259; \ - float16x8_t __s0_259 = __p0_259; \ - float16x8_t __s1_259 = __p1_259; \ - float16x8_t __rev0_259; __rev0_259 = __builtin_shufflevector(__s0_259, __s0_259, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_259; __rev1_259 = __builtin_shufflevector(__s1_259, __s1_259, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_259 = __rev0_259 * __noswap_splatq_laneq_f16(__rev1_259, __p2_259); \ - __ret_259 = __builtin_shufflevector(__ret_259, __ret_259, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_259; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f16(__p0_260, __p1_260, __p2_260) __extension__ ({ \ - float16x4_t __ret_260; \ - float16x4_t __s0_260 = __p0_260; \ - float16x8_t __s1_260 = __p1_260; \ - __ret_260 = __s0_260 * splat_laneq_f16(__s1_260, __p2_260); \ - __ret_260; \ -}) -#else -#define vmul_laneq_f16(__p0_261, __p1_261, __p2_261) __extension__ ({ \ - float16x4_t __ret_261; \ - float16x4_t __s0_261 = __p0_261; \ - float16x8_t __s1_261 = __p1_261; \ - float16x4_t __rev0_261; __rev0_261 = __builtin_shufflevector(__s0_261, __s0_261, 3, 2, 1, 0); \ - float16x8_t __rev1_261; __rev1_261 = __builtin_shufflevector(__s1_261, __s1_261, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_261 = __rev0_261 * __noswap_splat_laneq_f16(__rev1_261, __p2_261); \ - __ret_261 = __builtin_shufflevector(__ret_261, __ret_261, 3, 2, 1, 0); \ - __ret_261; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("fullfp16,neon"))) float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmulxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); - return __ret; -} -#else -__ai __attribute__((target("fullfp16,neon"))) float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vmulxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -__ai __attribute__((target("fullfp16,neon"))) float16x8_t __noswap_vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { - float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vmulxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("fullfp16,neon"))) float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmulx_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); - return __ret; -} -#else -__ai __attribute__((target("fullfp16,neon"))) float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vmulx_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -__ai __attribute__((target("fullfp16,neon"))) float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { - float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vmulx_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __ret; \ - float16_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (float16x4_t)__s1, __p2); \ - __ret; \ -}) -#else -#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ - float16_t __ret; \ - float16_t __s0 = __p0; \ - float16x4_t __s1 = __p1; \ - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vmulxh_lane_f16(__s0, (float16x4_t)__rev1, __p2); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f16(__p0_262, __p1_262, __p2_262) __extension__ ({ \ +#define vmulq_laneq_f16(__p0_262, __p1_262, __p2_262) __extension__ ({ \ float16x8_t __ret_262; \ float16x8_t __s0_262 = __p0_262; \ - float16x4_t __s1_262 = __p1_262; \ - __ret_262 = vmulxq_f16(__s0_262, splatq_lane_f16(__s1_262, __p2_262)); \ + float16x8_t __s1_262 = __p1_262; \ + __ret_262 = __s0_262 * splatq_laneq_f16(__s1_262, __p2_262); \ __ret_262; \ }) #else -#define vmulxq_lane_f16(__p0_263, __p1_263, __p2_263) __extension__ ({ \ +#define vmulq_laneq_f16(__p0_263, __p1_263, __p2_263) __extension__ ({ \ float16x8_t __ret_263; \ float16x8_t __s0_263 = __p0_263; \ - float16x4_t __s1_263 = __p1_263; \ - float16x8_t __rev0_263; __rev0_263 = __builtin_shufflevector(__s0_263, __s0_263, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1_263; __rev1_263 = __builtin_shufflevector(__s1_263, __s1_263, 3, 2, 1, 0); \ - __ret_263 = __noswap_vmulxq_f16(__rev0_263, __noswap_splatq_lane_f16(__rev1_263, __p2_263)); \ - __ret_263 = __builtin_shufflevector(__ret_263, __ret_263, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __s1_263 = __p1_263; \ + float16x8_t __rev0_263; __rev0_263 = __builtin_shufflevector(__s0_263, __s0_263, __lane_reverse_128_16); \ + float16x8_t __rev1_263; __rev1_263 = __builtin_shufflevector(__s1_263, __s1_263, __lane_reverse_128_16); \ + __ret_263 = __rev0_263 * __noswap_splatq_laneq_f16(__rev1_263, __p2_263); \ + __ret_263 = __builtin_shufflevector(__ret_263, __ret_263, __lane_reverse_128_16); \ __ret_263; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f16(__p0_264, __p1_264, __p2_264) __extension__ ({ \ +#define vmul_laneq_f16(__p0_264, __p1_264, __p2_264) __extension__ ({ \ float16x4_t __ret_264; \ float16x4_t __s0_264 = __p0_264; \ - float16x4_t __s1_264 = __p1_264; \ - __ret_264 = vmulx_f16(__s0_264, splat_lane_f16(__s1_264, __p2_264)); \ + float16x8_t __s1_264 = __p1_264; \ + __ret_264 = __s0_264 * splat_laneq_f16(__s1_264, __p2_264); \ __ret_264; \ }) #else -#define vmulx_lane_f16(__p0_265, __p1_265, __p2_265) __extension__ ({ \ +#define vmul_laneq_f16(__p0_265, __p1_265, __p2_265) __extension__ ({ \ float16x4_t __ret_265; \ float16x4_t __s0_265 = __p0_265; \ - float16x4_t __s1_265 = __p1_265; \ - float16x4_t __rev0_265; __rev0_265 = __builtin_shufflevector(__s0_265, __s0_265, 3, 2, 1, 0); \ - float16x4_t __rev1_265; __rev1_265 = __builtin_shufflevector(__s1_265, __s1_265, 3, 2, 1, 0); \ - __ret_265 = __noswap_vmulx_f16(__rev0_265, __noswap_splat_lane_f16(__rev1_265, __p2_265)); \ - __ret_265 = __builtin_shufflevector(__ret_265, __ret_265, 3, 2, 1, 0); \ + float16x8_t __s1_265 = __p1_265; \ + float16x4_t __rev0_265; __rev0_265 = __builtin_shufflevector(__s0_265, __s0_265, __lane_reverse_64_16); \ + float16x8_t __rev1_265; __rev1_265 = __builtin_shufflevector(__s1_265, __s1_265, __lane_reverse_128_16); \ + __ret_265 = __rev0_265 * __noswap_splat_laneq_f16(__rev1_265, __p2_265); \ + __ret_265 = __builtin_shufflevector(__ret_265, __ret_265, __lane_reverse_64_16); \ __ret_265; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ +__ai __attribute__((target("fullfp16,neon"))) float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmulxq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); + return __ret; +} +#else +__ai __attribute__((target("fullfp16,neon"))) float16x8_t vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmulxq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); + return __ret; +} +__ai __attribute__((target("fullfp16,neon"))) float16x8_t __noswap_vmulxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vmulxq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fullfp16,neon"))) float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmulx_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); + return __ret; +} +#else +__ai __attribute__((target("fullfp16,neon"))) float16x4_t vmulx_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmulx_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); + return __ret; +} +__ai __attribute__((target("fullfp16,neon"))) float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vmulx_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (float16x8_t)__s1, __p2); \ + float16x4_t __s1 = __p1; \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulxh_lane_f16(__s0, __s1, __p2)); \ __ret; \ }) #else -#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ +#define vmulxh_lane_f16(__p0, __p1, __p2) __extension__ ({ \ float16_t __ret; \ float16_t __s0 = __p0; \ - float16x8_t __s1 = __p1; \ - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vmulxh_laneq_f16(__s0, (float16x8_t)__rev1, __p2); \ + float16x4_t __s1 = __p1; \ + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulxh_lane_f16(__s0, __rev1, __p2)); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f16(__p0_266, __p1_266, __p2_266) __extension__ ({ \ +#define vmulxq_lane_f16(__p0_266, __p1_266, __p2_266) __extension__ ({ \ float16x8_t __ret_266; \ float16x8_t __s0_266 = __p0_266; \ - float16x8_t __s1_266 = __p1_266; \ - __ret_266 = vmulxq_f16(__s0_266, splatq_laneq_f16(__s1_266, __p2_266)); \ + float16x4_t __s1_266 = __p1_266; \ + __ret_266 = vmulxq_f16(__s0_266, splatq_lane_f16(__s1_266, __p2_266)); \ __ret_266; \ }) #else -#define vmulxq_laneq_f16(__p0_267, __p1_267, __p2_267) __extension__ ({ \ +#define vmulxq_lane_f16(__p0_267, __p1_267, __p2_267) __extension__ ({ \ float16x8_t __ret_267; \ float16x8_t __s0_267 = __p0_267; \ - float16x8_t __s1_267 = __p1_267; \ - float16x8_t __rev0_267; __rev0_267 = __builtin_shufflevector(__s0_267, __s0_267, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_267; __rev1_267 = __builtin_shufflevector(__s1_267, __s1_267, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_267 = __noswap_vmulxq_f16(__rev0_267, __noswap_splatq_laneq_f16(__rev1_267, __p2_267)); \ - __ret_267 = __builtin_shufflevector(__ret_267, __ret_267, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __s1_267 = __p1_267; \ + float16x8_t __rev0_267; __rev0_267 = __builtin_shufflevector(__s0_267, __s0_267, __lane_reverse_128_16); \ + float16x4_t __rev1_267; __rev1_267 = __builtin_shufflevector(__s1_267, __s1_267, __lane_reverse_64_16); \ + __ret_267 = __noswap_vmulxq_f16(__rev0_267, __noswap_splatq_lane_f16(__rev1_267, __p2_267)); \ + __ret_267 = __builtin_shufflevector(__ret_267, __ret_267, __lane_reverse_128_16); \ __ret_267; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f16(__p0_268, __p1_268, __p2_268) __extension__ ({ \ +#define vmulx_lane_f16(__p0_268, __p1_268, __p2_268) __extension__ ({ \ float16x4_t __ret_268; \ float16x4_t __s0_268 = __p0_268; \ - float16x8_t __s1_268 = __p1_268; \ - __ret_268 = vmulx_f16(__s0_268, splat_laneq_f16(__s1_268, __p2_268)); \ + float16x4_t __s1_268 = __p1_268; \ + __ret_268 = vmulx_f16(__s0_268, splat_lane_f16(__s1_268, __p2_268)); \ __ret_268; \ }) #else -#define vmulx_laneq_f16(__p0_269, __p1_269, __p2_269) __extension__ ({ \ +#define vmulx_lane_f16(__p0_269, __p1_269, __p2_269) __extension__ ({ \ float16x4_t __ret_269; \ float16x4_t __s0_269 = __p0_269; \ - float16x8_t __s1_269 = __p1_269; \ - float16x4_t __rev0_269; __rev0_269 = __builtin_shufflevector(__s0_269, __s0_269, 3, 2, 1, 0); \ - float16x8_t __rev1_269; __rev1_269 = __builtin_shufflevector(__s1_269, __s1_269, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_269 = __noswap_vmulx_f16(__rev0_269, __noswap_splat_laneq_f16(__rev1_269, __p2_269)); \ - __ret_269 = __builtin_shufflevector(__ret_269, __ret_269, 3, 2, 1, 0); \ + float16x4_t __s1_269 = __p1_269; \ + float16x4_t __rev0_269; __rev0_269 = __builtin_shufflevector(__s0_269, __s0_269, __lane_reverse_64_16); \ + float16x4_t __rev1_269; __rev1_269 = __builtin_shufflevector(__s1_269, __s1_269, __lane_reverse_64_16); \ + __ret_269 = __noswap_vmulx_f16(__rev0_269, __noswap_splat_lane_f16(__rev1_269, __p2_269)); \ + __ret_269 = __builtin_shufflevector(__ret_269, __ret_269, __lane_reverse_64_16); \ __ret_269; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16_t __ret; \ + float16_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulxh_laneq_f16(__s0, __s1, __p2)); \ + __ret; \ +}) +#else +#define vmulxh_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16_t __ret; \ + float16_t __s0 = __p0; \ + float16x8_t __s1 = __p1; \ + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vmulxh_laneq_f16(__s0, __rev1, __p2)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxq_laneq_f16(__p0_270, __p1_270, __p2_270) __extension__ ({ \ + float16x8_t __ret_270; \ + float16x8_t __s0_270 = __p0_270; \ + float16x8_t __s1_270 = __p1_270; \ + __ret_270 = vmulxq_f16(__s0_270, splatq_laneq_f16(__s1_270, __p2_270)); \ + __ret_270; \ +}) +#else +#define vmulxq_laneq_f16(__p0_271, __p1_271, __p2_271) __extension__ ({ \ + float16x8_t __ret_271; \ + float16x8_t __s0_271 = __p0_271; \ + float16x8_t __s1_271 = __p1_271; \ + float16x8_t __rev0_271; __rev0_271 = __builtin_shufflevector(__s0_271, __s0_271, __lane_reverse_128_16); \ + float16x8_t __rev1_271; __rev1_271 = __builtin_shufflevector(__s1_271, __s1_271, __lane_reverse_128_16); \ + __ret_271 = __noswap_vmulxq_f16(__rev0_271, __noswap_splatq_laneq_f16(__rev1_271, __p2_271)); \ + __ret_271 = __builtin_shufflevector(__ret_271, __ret_271, __lane_reverse_128_16); \ + __ret_271; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulx_laneq_f16(__p0_272, __p1_272, __p2_272) __extension__ ({ \ + float16x4_t __ret_272; \ + float16x4_t __s0_272 = __p0_272; \ + float16x8_t __s1_272 = __p1_272; \ + __ret_272 = vmulx_f16(__s0_272, splat_laneq_f16(__s1_272, __p2_272)); \ + __ret_272; \ +}) +#else +#define vmulx_laneq_f16(__p0_273, __p1_273, __p2_273) __extension__ ({ \ + float16x4_t __ret_273; \ + float16x4_t __s0_273 = __p0_273; \ + float16x8_t __s1_273 = __p1_273; \ + float16x4_t __rev0_273; __rev0_273 = __builtin_shufflevector(__s0_273, __s0_273, __lane_reverse_64_16); \ + float16x8_t __rev1_273; __rev1_273 = __builtin_shufflevector(__s1_273, __s1_273, __lane_reverse_128_16); \ + __ret_273 = __noswap_vmulx_f16(__rev0_273, __noswap_splat_laneq_f16(__rev1_273, __p2_273)); \ + __ret_273 = __builtin_shufflevector(__ret_273, __ret_273, __lane_reverse_64_16); \ + __ret_273; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ #define vmulxq_n_f16(__p0, __p1) __extension__ ({ \ float16x8_t __ret; \ @@ -44881,9 +45821,9 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t __noswap_vmulx_f16(flo float16x8_t __ret; \ float16x8_t __s0 = __p0; \ float16_t __s1 = __p1; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ __ret = __noswap_vmulxq_f16(__rev0, (float16x8_t) {__s1, __s1, __s1, __s1, __s1, __s1, __s1, __s1}); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -44901,9 +45841,9 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t __noswap_vmulx_f16(flo float16x4_t __ret; \ float16x4_t __s0 = __p0; \ float16_t __s1 = __p1; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ __ret = __noswap_vmulx_f16(__rev0, (float16x4_t) {__s1, __s1, __s1, __s1}); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif @@ -44911,16 +45851,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t __noswap_vmulx_f16(flo #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpaddq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpaddq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vpaddq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpaddq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -44928,16 +45868,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpaddq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpmaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpmaxq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vpmaxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpmaxq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -44945,16 +45885,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpmaxq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpmaxnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpmaxnmq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpmaxnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vpmaxnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpmaxnmq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -44962,16 +45902,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpmaxnmq_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpmaxnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpmaxnm_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmaxnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vpmaxnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpmaxnm_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -44979,16 +45919,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpmaxnm_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpminq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vpminq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpminq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -44996,16 +45936,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpminq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vpminnmq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpminnmq_f16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpminnmq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vpminnmq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vpminnmq_f16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -45013,16 +45953,16 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vpminnmq_f16(float16x8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vpminnm_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpminnm_f16(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpminnm_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vpminnm_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vpminnm_f16(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -45030,15 +45970,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vpminnm_f16(float16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndiq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vrndiq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndiq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndiq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vrndiq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vrndiq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -45046,15 +45986,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vrndiq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndi_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vrndi_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndi_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndi_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vrndi_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vrndi_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -45062,15 +46002,15 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vrndi_f16(float16x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vsqrtq_f16(float16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t) __builtin_neon_vsqrtq_f16((int8x16_t)__p0, 40); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vsqrtq_f16(__builtin_bit_cast(int8x16_t, __p0), 40)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x8_t vsqrtq_f16(float16x8_t __p0) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (float16x8_t) __builtin_neon_vsqrtq_f16((int8x16_t)__rev0, 40); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(float16x8_t, __builtin_neon_vsqrtq_f16(__builtin_bit_cast(int8x16_t, __rev0), 40)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -45078,159 +46018,159 @@ __ai __attribute__((target("fullfp16,neon"))) float16x8_t vsqrtq_f16(float16x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vsqrt_f16(float16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t) __builtin_neon_vsqrt_f16((int8x8_t)__p0, 8); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vsqrt_f16(__builtin_bit_cast(int8x8_t, __p0), 8)); return __ret; } #else __ai __attribute__((target("fullfp16,neon"))) float16x4_t vsqrt_f16(float16x4_t __p0) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float16x4_t) __builtin_neon_vsqrt_f16((int8x8_t)__rev0, 8); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(float16x4_t, __builtin_neon_vsqrt_f16(__builtin_bit_cast(int8x8_t, __rev0), 8)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vsudotq_laneq_s32(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ - int32x4_t __ret_270; \ - int32x4_t __s0_270 = __p0_270; \ - int8x16_t __s1_270 = __p1_270; \ - uint8x16_t __s2_270 = __p2_270; \ - __ret_270 = vusdotq_s32(__s0_270, (uint8x16_t)(splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_270), __p3_270)), __s1_270); \ - __ret_270; \ -}) -#else -#define vsudotq_laneq_s32(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ - int32x4_t __ret_271; \ - int32x4_t __s0_271 = __p0_271; \ - int8x16_t __s1_271 = __p1_271; \ - uint8x16_t __s2_271 = __p2_271; \ - int32x4_t __rev0_271; __rev0_271 = __builtin_shufflevector(__s0_271, __s0_271, 3, 2, 1, 0); \ - int8x16_t __rev1_271; __rev1_271 = __builtin_shufflevector(__s1_271, __s1_271, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_271; __rev2_271 = __builtin_shufflevector(__s2_271, __s2_271, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_271 = __noswap_vusdotq_s32(__rev0_271, (uint8x16_t)(__noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_271), __p3_271)), __rev1_271); \ - __ret_271 = __builtin_shufflevector(__ret_271, __ret_271, 3, 2, 1, 0); \ - __ret_271; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsudot_laneq_s32(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ - int32x2_t __ret_272; \ - int32x2_t __s0_272 = __p0_272; \ - int8x8_t __s1_272 = __p1_272; \ - uint8x16_t __s2_272 = __p2_272; \ - __ret_272 = vusdot_s32(__s0_272, (uint8x8_t)(splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_272), __p3_272)), __s1_272); \ - __ret_272; \ -}) -#else -#define vsudot_laneq_s32(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ - int32x2_t __ret_273; \ - int32x2_t __s0_273 = __p0_273; \ - int8x8_t __s1_273 = __p1_273; \ - uint8x16_t __s2_273 = __p2_273; \ - int32x2_t __rev0_273; __rev0_273 = __builtin_shufflevector(__s0_273, __s0_273, 1, 0); \ - int8x8_t __rev1_273; __rev1_273 = __builtin_shufflevector(__s1_273, __s1_273, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_273; __rev2_273 = __builtin_shufflevector(__s2_273, __s2_273, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_273 = __noswap_vusdot_s32(__rev0_273, (uint8x8_t)(__noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_273), __p3_273)), __rev1_273); \ - __ret_273 = __builtin_shufflevector(__ret_273, __ret_273, 1, 0); \ - __ret_273; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vusdotq_laneq_s32(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ +#define vsudotq_laneq_s32(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ int32x4_t __ret_274; \ int32x4_t __s0_274 = __p0_274; \ - uint8x16_t __s1_274 = __p1_274; \ - int8x16_t __s2_274 = __p2_274; \ - __ret_274 = vusdotq_s32(__s0_274, __s1_274, (int8x16_t)(splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_274), __p3_274))); \ + int8x16_t __s1_274 = __p1_274; \ + uint8x16_t __s2_274 = __p2_274; \ + __ret_274 = vusdotq_s32(__s0_274, __builtin_bit_cast(uint8x16_t, splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_274), __p3_274)), __s1_274); \ __ret_274; \ }) #else -#define vusdotq_laneq_s32(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ +#define vsudotq_laneq_s32(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ int32x4_t __ret_275; \ int32x4_t __s0_275 = __p0_275; \ - uint8x16_t __s1_275 = __p1_275; \ - int8x16_t __s2_275 = __p2_275; \ - int32x4_t __rev0_275; __rev0_275 = __builtin_shufflevector(__s0_275, __s0_275, 3, 2, 1, 0); \ - uint8x16_t __rev1_275; __rev1_275 = __builtin_shufflevector(__s1_275, __s1_275, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_275; __rev2_275 = __builtin_shufflevector(__s2_275, __s2_275, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_275 = __noswap_vusdotq_s32(__rev0_275, __rev1_275, (int8x16_t)(__noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_275), __p3_275))); \ - __ret_275 = __builtin_shufflevector(__ret_275, __ret_275, 3, 2, 1, 0); \ + int8x16_t __s1_275 = __p1_275; \ + uint8x16_t __s2_275 = __p2_275; \ + int32x4_t __rev0_275; __rev0_275 = __builtin_shufflevector(__s0_275, __s0_275, __lane_reverse_128_32); \ + int8x16_t __rev1_275; __rev1_275 = __builtin_shufflevector(__s1_275, __s1_275, __lane_reverse_128_8); \ + uint8x16_t __rev2_275; __rev2_275 = __builtin_shufflevector(__s2_275, __s2_275, __lane_reverse_128_8); \ + __ret_275 = __noswap_vusdotq_s32(__rev0_275, __builtin_bit_cast(uint8x16_t, __noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_275), __p3_275)), __rev1_275); \ + __ret_275 = __builtin_shufflevector(__ret_275, __ret_275, __lane_reverse_128_32); \ __ret_275; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vusdot_laneq_s32(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ +#define vsudot_laneq_s32(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ int32x2_t __ret_276; \ int32x2_t __s0_276 = __p0_276; \ - uint8x8_t __s1_276 = __p1_276; \ - int8x16_t __s2_276 = __p2_276; \ - __ret_276 = vusdot_s32(__s0_276, __s1_276, (int8x8_t)(splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_276), __p3_276))); \ + int8x8_t __s1_276 = __p1_276; \ + uint8x16_t __s2_276 = __p2_276; \ + __ret_276 = vusdot_s32(__s0_276, __builtin_bit_cast(uint8x8_t, splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_276), __p3_276)), __s1_276); \ __ret_276; \ }) #else -#define vusdot_laneq_s32(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ +#define vsudot_laneq_s32(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ int32x2_t __ret_277; \ int32x2_t __s0_277 = __p0_277; \ - uint8x8_t __s1_277 = __p1_277; \ - int8x16_t __s2_277 = __p2_277; \ - int32x2_t __rev0_277; __rev0_277 = __builtin_shufflevector(__s0_277, __s0_277, 1, 0); \ - uint8x8_t __rev1_277; __rev1_277 = __builtin_shufflevector(__s1_277, __s1_277, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_277; __rev2_277 = __builtin_shufflevector(__s2_277, __s2_277, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_277 = __noswap_vusdot_s32(__rev0_277, __rev1_277, (int8x8_t)(__noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_277), __p3_277))); \ - __ret_277 = __builtin_shufflevector(__ret_277, __ret_277, 1, 0); \ + int8x8_t __s1_277 = __p1_277; \ + uint8x16_t __s2_277 = __p2_277; \ + int32x2_t __rev0_277; __rev0_277 = __builtin_shufflevector(__s0_277, __s0_277, __lane_reverse_64_32); \ + int8x8_t __rev1_277; __rev1_277 = __builtin_shufflevector(__s1_277, __s1_277, __lane_reverse_64_8); \ + uint8x16_t __rev2_277; __rev2_277 = __builtin_shufflevector(__s2_277, __s2_277, __lane_reverse_128_8); \ + __ret_277 = __noswap_vusdot_s32(__rev0_277, __builtin_bit_cast(uint8x8_t, __noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_277), __p3_277)), __rev1_277); \ + __ret_277 = __builtin_shufflevector(__ret_277, __ret_277, __lane_reverse_64_32); \ __ret_277; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vusdotq_laneq_s32(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ + int32x4_t __ret_278; \ + int32x4_t __s0_278 = __p0_278; \ + uint8x16_t __s1_278 = __p1_278; \ + int8x16_t __s2_278 = __p2_278; \ + __ret_278 = vusdotq_s32(__s0_278, __s1_278, __builtin_bit_cast(int8x16_t, splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_278), __p3_278))); \ + __ret_278; \ +}) +#else +#define vusdotq_laneq_s32(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ + int32x4_t __ret_279; \ + int32x4_t __s0_279 = __p0_279; \ + uint8x16_t __s1_279 = __p1_279; \ + int8x16_t __s2_279 = __p2_279; \ + int32x4_t __rev0_279; __rev0_279 = __builtin_shufflevector(__s0_279, __s0_279, __lane_reverse_128_32); \ + uint8x16_t __rev1_279; __rev1_279 = __builtin_shufflevector(__s1_279, __s1_279, __lane_reverse_128_8); \ + int8x16_t __rev2_279; __rev2_279 = __builtin_shufflevector(__s2_279, __s2_279, __lane_reverse_128_8); \ + __ret_279 = __noswap_vusdotq_s32(__rev0_279, __rev1_279, __builtin_bit_cast(int8x16_t, __noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_279), __p3_279))); \ + __ret_279 = __builtin_shufflevector(__ret_279, __ret_279, __lane_reverse_128_32); \ + __ret_279; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vusdot_laneq_s32(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ + int32x2_t __ret_280; \ + int32x2_t __s0_280 = __p0_280; \ + uint8x8_t __s1_280 = __p1_280; \ + int8x16_t __s2_280 = __p2_280; \ + __ret_280 = vusdot_s32(__s0_280, __s1_280, __builtin_bit_cast(int8x8_t, splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_280), __p3_280))); \ + __ret_280; \ +}) +#else +#define vusdot_laneq_s32(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ + int32x2_t __ret_281; \ + int32x2_t __s0_281 = __p0_281; \ + uint8x8_t __s1_281 = __p1_281; \ + int8x16_t __s2_281 = __p2_281; \ + int32x2_t __rev0_281; __rev0_281 = __builtin_shufflevector(__s0_281, __s0_281, __lane_reverse_64_32); \ + uint8x8_t __rev1_281; __rev1_281 = __builtin_shufflevector(__s1_281, __s1_281, __lane_reverse_64_8); \ + int8x16_t __rev2_281; __rev2_281 = __builtin_shufflevector(__s2_281, __s2_281, __lane_reverse_128_8); \ + __ret_281 = __noswap_vusdot_s32(__rev0_281, __rev1_281, __builtin_bit_cast(int8x8_t, __noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_281), __p3_281))); \ + __ret_281 = __builtin_shufflevector(__ret_281, __ret_281, __lane_reverse_64_32); \ + __ret_281; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vabdq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vabdq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vabdq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vabd_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vabd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } __ai __attribute__((target("neon"))) float64_t vabdd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vabdd_f64(__p0, __p1); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vabdd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) float32_t vabds_f32(float32_t __p0, float32_t __p1) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vabds_f32(__p0, __p1); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vabds_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vabsq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vabsq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -45238,32 +46178,32 @@ __ai __attribute__((target("neon"))) float64x2_t vabsq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vabsq_s64(int64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vabsq_s64(int64x2_t __p0) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vabsq_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vabs_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vabs_s64(int64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vabs_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vabs_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int64_t vabsd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vabsd_s64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vabsd_s64(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -45275,10 +46215,10 @@ __ai __attribute__((target("neon"))) float64x2_t vaddq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 + __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -45290,17 +46230,17 @@ __ai __attribute__((target("neon"))) float64x1_t vadd_f64(float64x1_t __p0, floa } __ai __attribute__((target("neon"))) uint64_t vaddd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vaddd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddd_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) poly128_t vaddq_p128(poly128_t __p0, poly128_t __p1) { poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vaddq_p128(__p0, __p1); + __ret = __builtin_bit_cast(poly128_t, __builtin_neon_vaddq_p128(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -45312,11 +46252,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vaddhn_high_u32(uint16x4_t __p0, #else __ai __attribute__((target("neon"))) uint16x8_t vaddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_u16(__rev0, __noswap_vaddhn_u32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -45330,11 +46270,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vaddhn_high_u64(uint32x2_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vaddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_u32(__rev0, __noswap_vaddhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -45348,11 +46288,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vaddhn_high_u16(uint8x8_t __p0, #else __ai __attribute__((target("neon"))) uint8x16_t vaddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_u8(__rev0, __noswap_vaddhn_u16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -45366,11 +46306,11 @@ __ai __attribute__((target("neon"))) int16x8_t vaddhn_high_s32(int16x4_t __p0, i #else __ai __attribute__((target("neon"))) int16x8_t vaddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_s16(__rev0, __noswap_vaddhn_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -45384,11 +46324,11 @@ __ai __attribute__((target("neon"))) int32x4_t vaddhn_high_s64(int32x2_t __p0, i #else __ai __attribute__((target("neon"))) int32x4_t vaddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_s32(__rev0, __noswap_vaddhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -45402,11 +46342,11 @@ __ai __attribute__((target("neon"))) int8x16_t vaddhn_high_s16(int8x8_t __p0, in #else __ai __attribute__((target("neon"))) int8x16_t vaddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_s8(__rev0, __noswap_vaddhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -45414,14 +46354,14 @@ __ai __attribute__((target("neon"))) int8x16_t vaddhn_high_s16(int8x8_t __p0, in #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vaddlvq_u8(uint8x16_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlvq_u8(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddlvq_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vaddlvq_u8(uint8x16_t __p0) { uint16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vaddlvq_u8(__rev0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddlvq_u8(__rev0)); return __ret; } #endif @@ -45429,14 +46369,14 @@ __ai __attribute__((target("neon"))) uint16_t vaddlvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64_t vaddlvq_u32(uint32x4_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlvq_u32(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddlvq_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint64_t vaddlvq_u32(uint32x4_t __p0) { uint64_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint64_t) __builtin_neon_vaddlvq_u32(__rev0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddlvq_u32(__rev0)); return __ret; } #endif @@ -45444,14 +46384,14 @@ __ai __attribute__((target("neon"))) uint64_t vaddlvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vaddlvq_u16(uint16x8_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlvq_u16(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddlvq_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vaddlvq_u16(uint16x8_t __p0) { uint32_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint32_t) __builtin_neon_vaddlvq_u16(__rev0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddlvq_u16(__rev0)); return __ret; } #endif @@ -45459,14 +46399,14 @@ __ai __attribute__((target("neon"))) uint32_t vaddlvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vaddlvq_s8(int8x16_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlvq_s8(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddlvq_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vaddlvq_s8(int8x16_t __p0) { int16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vaddlvq_s8(__rev0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddlvq_s8(__rev0)); return __ret; } #endif @@ -45474,14 +46414,14 @@ __ai __attribute__((target("neon"))) int16_t vaddlvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64_t vaddlvq_s32(int32x4_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlvq_s32(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddlvq_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int64_t vaddlvq_s32(int32x4_t __p0) { int64_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int64_t) __builtin_neon_vaddlvq_s32(__rev0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddlvq_s32(__rev0)); return __ret; } #endif @@ -45489,14 +46429,14 @@ __ai __attribute__((target("neon"))) int64_t vaddlvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vaddlvq_s16(int16x8_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlvq_s16(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddlvq_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vaddlvq_s16(int16x8_t __p0) { int32_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int32_t) __builtin_neon_vaddlvq_s16(__rev0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddlvq_s16(__rev0)); return __ret; } #endif @@ -45504,14 +46444,14 @@ __ai __attribute__((target("neon"))) int32_t vaddlvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vaddlv_u8(uint8x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddlv_u8(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddlv_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vaddlv_u8(uint8x8_t __p0) { uint16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vaddlv_u8(__rev0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddlv_u8(__rev0)); return __ret; } #endif @@ -45519,14 +46459,14 @@ __ai __attribute__((target("neon"))) uint16_t vaddlv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64_t vaddlv_u32(uint32x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddlv_u32(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddlv_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint64_t vaddlv_u32(uint32x2_t __p0) { uint64_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64_t) __builtin_neon_vaddlv_u32(__rev0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddlv_u32(__rev0)); return __ret; } #endif @@ -45534,14 +46474,14 @@ __ai __attribute__((target("neon"))) uint64_t vaddlv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vaddlv_u16(uint16x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddlv_u16(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddlv_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vaddlv_u16(uint16x4_t __p0) { uint32_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32_t) __builtin_neon_vaddlv_u16(__rev0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddlv_u16(__rev0)); return __ret; } #endif @@ -45549,14 +46489,14 @@ __ai __attribute__((target("neon"))) uint32_t vaddlv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vaddlv_s8(int8x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddlv_s8(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddlv_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vaddlv_s8(int8x8_t __p0) { int16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vaddlv_s8(__rev0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddlv_s8(__rev0)); return __ret; } #endif @@ -45564,14 +46504,14 @@ __ai __attribute__((target("neon"))) int16_t vaddlv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64_t vaddlv_s32(int32x2_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddlv_s32(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddlv_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int64_t vaddlv_s32(int32x2_t __p0) { int64_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64_t) __builtin_neon_vaddlv_s32(__rev0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddlv_s32(__rev0)); return __ret; } #endif @@ -45579,14 +46519,14 @@ __ai __attribute__((target("neon"))) int64_t vaddlv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vaddlv_s16(int16x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddlv_s16(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddlv_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vaddlv_s16(int16x4_t __p0) { int32_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32_t) __builtin_neon_vaddlv_s16(__rev0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddlv_s16(__rev0)); return __ret; } #endif @@ -45594,14 +46534,14 @@ __ai __attribute__((target("neon"))) int32_t vaddlv_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8_t vaddvq_u8(uint8x16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddvq_u8(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vaddvq_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8_t vaddvq_u8(uint8x16_t __p0) { uint8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8_t) __builtin_neon_vaddvq_u8(__rev0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vaddvq_u8(__rev0)); return __ret; } #endif @@ -45609,14 +46549,14 @@ __ai __attribute__((target("neon"))) uint8_t vaddvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vaddvq_u32(uint32x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddvq_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddvq_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vaddvq_u32(uint32x4_t __p0) { uint32_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32_t) __builtin_neon_vaddvq_u32(__rev0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddvq_u32(__rev0)); return __ret; } #endif @@ -45624,14 +46564,14 @@ __ai __attribute__((target("neon"))) uint32_t vaddvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64_t vaddvq_u64(uint64x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vaddvq_u64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddvq_u64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint64_t vaddvq_u64(uint64x2_t __p0) { uint64_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64_t) __builtin_neon_vaddvq_u64(__rev0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vaddvq_u64(__rev0)); return __ret; } #endif @@ -45639,14 +46579,14 @@ __ai __attribute__((target("neon"))) uint64_t vaddvq_u64(uint64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vaddvq_u16(uint16x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddvq_u16(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddvq_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vaddvq_u16(uint16x8_t __p0) { uint16_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vaddvq_u16(__rev0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddvq_u16(__rev0)); return __ret; } #endif @@ -45654,14 +46594,14 @@ __ai __attribute__((target("neon"))) uint16_t vaddvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8_t vaddvq_s8(int8x16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddvq_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vaddvq_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int8_t vaddvq_s8(int8x16_t __p0) { int8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8_t) __builtin_neon_vaddvq_s8(__rev0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vaddvq_s8(__rev0)); return __ret; } #endif @@ -45669,14 +46609,14 @@ __ai __attribute__((target("neon"))) int8_t vaddvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vaddvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vaddvq_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vaddvq_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vaddvq_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vaddvq_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vaddvq_f64(__rev0)); return __ret; } #endif @@ -45684,14 +46624,14 @@ __ai __attribute__((target("neon"))) float64_t vaddvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vaddvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddvq_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vaddvq_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vaddvq_f32(float32x4_t __p0) { float32_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32_t) __builtin_neon_vaddvq_f32(__rev0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vaddvq_f32(__rev0)); return __ret; } #endif @@ -45699,14 +46639,14 @@ __ai __attribute__((target("neon"))) float32_t vaddvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vaddvq_s32(int32x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddvq_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddvq_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vaddvq_s32(int32x4_t __p0) { int32_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32_t) __builtin_neon_vaddvq_s32(__rev0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddvq_s32(__rev0)); return __ret; } #endif @@ -45714,14 +46654,14 @@ __ai __attribute__((target("neon"))) int32_t vaddvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64_t vaddvq_s64(int64x2_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vaddvq_s64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddvq_s64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int64_t vaddvq_s64(int64x2_t __p0) { int64_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64_t) __builtin_neon_vaddvq_s64(__rev0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vaddvq_s64(__rev0)); return __ret; } #endif @@ -45729,14 +46669,14 @@ __ai __attribute__((target("neon"))) int64_t vaddvq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vaddvq_s16(int16x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddvq_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddvq_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vaddvq_s16(int16x8_t __p0) { int16_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vaddvq_s16(__rev0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddvq_s16(__rev0)); return __ret; } #endif @@ -45744,14 +46684,14 @@ __ai __attribute__((target("neon"))) int16_t vaddvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8_t vaddv_u8(uint8x8_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vaddv_u8(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vaddv_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8_t vaddv_u8(uint8x8_t __p0) { uint8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8_t) __builtin_neon_vaddv_u8(__rev0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vaddv_u8(__rev0)); return __ret; } #endif @@ -45759,14 +46699,14 @@ __ai __attribute__((target("neon"))) uint8_t vaddv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vaddv_u32(uint32x2_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vaddv_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddv_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vaddv_u32(uint32x2_t __p0) { uint32_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32_t) __builtin_neon_vaddv_u32(__rev0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vaddv_u32(__rev0)); return __ret; } #endif @@ -45774,14 +46714,14 @@ __ai __attribute__((target("neon"))) uint32_t vaddv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vaddv_u16(uint16x4_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vaddv_u16(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddv_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vaddv_u16(uint16x4_t __p0) { uint16_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vaddv_u16(__rev0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vaddv_u16(__rev0)); return __ret; } #endif @@ -45789,14 +46729,14 @@ __ai __attribute__((target("neon"))) uint16_t vaddv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8_t vaddv_s8(int8x8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vaddv_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vaddv_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int8_t vaddv_s8(int8x8_t __p0) { int8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8_t) __builtin_neon_vaddv_s8(__rev0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vaddv_s8(__rev0)); return __ret; } #endif @@ -45804,14 +46744,14 @@ __ai __attribute__((target("neon"))) int8_t vaddv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vaddv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vaddv_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vaddv_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vaddv_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vaddv_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vaddv_f32(__rev0)); return __ret; } #endif @@ -45819,14 +46759,14 @@ __ai __attribute__((target("neon"))) float32_t vaddv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vaddv_s32(int32x2_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vaddv_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddv_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vaddv_s32(int32x2_t __p0) { int32_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32_t) __builtin_neon_vaddv_s32(__rev0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vaddv_s32(__rev0)); return __ret; } #endif @@ -45834,37 +46774,37 @@ __ai __attribute__((target("neon"))) int32_t vaddv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vaddv_s16(int16x4_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vaddv_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddv_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vaddv_s16(int16x4_t __p0) { int16_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vaddv_s16(__rev0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vaddv_s16(__rev0)); return __ret; } #endif __ai __attribute__((target("neon"))) poly64x1_t vbsl_p64(uint64x1_t __p0, poly64x1_t __p1, poly64x1_t __p2) { poly64x1_t __ret; - __ret = (poly64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 6); + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 6)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { poly64x2_t __ret; - __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 38); + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 38)); return __ret; } #else __ai __attribute__((target("neon"))) poly64x2_t vbslq_p64(uint64x2_t __p0, poly64x2_t __p1, poly64x2_t __p2) { poly64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - poly64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (poly64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 38); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + poly64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 38)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -45872,172 +46812,172 @@ __ai __attribute__((target("neon"))) poly64x2_t vbslq_p64(uint64x2_t __p0, poly6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vbslq_f64(uint64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float64x2_t) __builtin_neon_vbslq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vbslq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vbsl_f64(uint64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vbsl_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcageq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcageq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcageq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcageq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcage_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcage_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcaged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaged_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcaged_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcages_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcages_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcages_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcagtq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcagtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcagtq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcagtq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcagt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcagt_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcagtd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcagtd_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcagtd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcagts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcagts_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcagts_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcaleq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcaleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcaleq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcaleq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcale_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcale_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcaled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaled_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcaled_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcales_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcales_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcales_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcaltq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcaltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcaltq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcaltq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcalt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcalt_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcaltd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcaltd_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcaltd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcalts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcalts_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcalts_f32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vceq_p64(poly64x1_t __p0, poly64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 == __p1); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46045,16 +46985,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqq_p64(poly64x2_t __p0, poly6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46062,16 +47002,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46079,88 +47019,88 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqq_f64(float64x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 == __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 == __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 == __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vceq_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 == __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vceq_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 == __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vceq_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 == __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 == __p1); return __ret; } __ai __attribute__((target("neon"))) uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vceqd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vceqd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_s64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vceqd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqd_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vceqd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vceqs_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqs_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vceqs_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vceqz_p8(poly8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 4)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vceqz_p8(poly8x8_t __p0) { uint8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vceqz_p64(poly64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 6)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vceqzq_p8(poly8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 36)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vceqzq_p8(poly8x16_t __p0) { uint8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -46168,15 +47108,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vceqzq_p8(poly8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_p64(poly64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 38)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqzq_p64(poly64x2_t __p0) { uint64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 38)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46184,15 +47124,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_p64(poly64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vceqzq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vceqzq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -46200,15 +47140,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vceqzq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vceqzq_u32(uint32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vceqzq_u32(uint32x4_t __p0) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46216,15 +47156,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vceqzq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_u64(uint64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqzq_u64(uint64x2_t __p0) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46232,15 +47172,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_u64(uint64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vceqzq_u16(uint16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vceqzq_u16(uint16x8_t __p0) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -46248,15 +47188,15 @@ __ai __attribute__((target("neon"))) uint16x8_t vceqzq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vceqzq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vceqzq_s8(int8x16_t __p0) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -46264,15 +47204,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vceqzq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqzq_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46280,15 +47220,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vceqzq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vceqzq_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46296,15 +47236,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vceqzq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vceqzq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vceqzq_s32(int32x4_t __p0) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46312,15 +47252,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vceqzq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vceqzq_s64(int64x2_t __p0) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46328,15 +47268,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vceqzq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vceqzq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vceqzq_s16(int16x8_t __p0) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vceqzq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -46344,15 +47284,15 @@ __ai __attribute__((target("neon"))) uint16x8_t vceqzq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vceqz_u8(uint8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vceqz_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -46360,36 +47300,36 @@ __ai __attribute__((target("neon"))) uint8x8_t vceqz_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vceqz_u32(uint32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vceqz_u32(uint32x2_t __p0) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vceqz_u64(uint64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vceqz_u16(uint16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vceqz_u16(uint16x4_t __p0) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -46397,36 +47337,36 @@ __ai __attribute__((target("neon"))) uint16x4_t vceqz_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vceqz_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vceqz_s8(int8x8_t __p0) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vceqz_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vceqz_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vceqz_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -46434,73 +47374,73 @@ __ai __attribute__((target("neon"))) uint32x2_t vceqz_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vceqz_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vceqz_s32(int32x2_t __p0) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vceqz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vceqz_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vceqz_s16(int16x4_t __p0) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vceqz_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif __ai __attribute__((target("neon"))) uint64_t vceqzd_u64(uint64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vceqzd_u64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vceqzd_s64(int64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_s64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vceqzd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vceqzd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vceqzd_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vceqzd_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vceqzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vceqzs_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vceqzs_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgeq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46508,16 +47448,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgeq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgeq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46525,67 +47465,67 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgeq_f64(float64x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 >= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgeq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 >= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 >= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcge_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 >= __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vcge_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 >= __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 >= __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 >= __p1); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcged_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_s64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcged_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcged_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcged_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcged_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcged_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcges_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcges_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcges_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcgezq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcgezq_s8(int8x16_t __p0) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -46593,15 +47533,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vcgezq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgezq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgezq_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46609,15 +47549,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgezq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgezq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgezq_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46625,15 +47565,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgezq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgezq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgezq_s32(int32x4_t __p0) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46641,15 +47581,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgezq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgezq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgezq_s64(int64x2_t __p0) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46657,15 +47597,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgezq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcgezq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcgezq_s16(int16x8_t __p0) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcgezq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgezq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -46673,36 +47613,36 @@ __ai __attribute__((target("neon"))) uint16x8_t vcgezq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcgez_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcgez_s8(int8x8_t __p0) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcgez_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgez_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgez_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -46710,68 +47650,68 @@ __ai __attribute__((target("neon"))) uint32x2_t vcgez_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgez_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgez_s32(int32x2_t __p0) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcgez_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcgez_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcgez_s16(int16x4_t __p0) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcgez_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgez_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif __ai __attribute__((target("neon"))) uint64_t vcgezd_s64(int64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgezd_s64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgezd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcgezd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgezd_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgezd_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcgezs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgezs_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcgezs_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgtq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46779,16 +47719,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgtq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgtq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46796,67 +47736,67 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgtq_f64(float64x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 > __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgtq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 > __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 > __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcgt_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 > __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vcgt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 > __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 > __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 > __p1); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcgtd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_s64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgtd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgtd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcgtd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtd_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgtd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcgts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgts_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcgts_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcgtzq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcgtzq_s8(int8x16_t __p0) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -46864,15 +47804,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vcgtzq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgtzq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgtzq_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46880,15 +47820,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgtzq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgtzq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgtzq_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46896,15 +47836,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgtzq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcgtzq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcgtzq_s32(int32x4_t __p0) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -46912,15 +47852,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcgtzq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcgtzq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcgtzq_s64(int64x2_t __p0) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -46928,15 +47868,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vcgtzq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcgtzq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcgtzq_s16(int16x8_t __p0) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcgtzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcgtzq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -46944,36 +47884,36 @@ __ai __attribute__((target("neon"))) uint16x8_t vcgtzq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcgtz_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcgtz_s8(int8x8_t __p0) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcgtz_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgtz_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgtz_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -46981,68 +47921,68 @@ __ai __attribute__((target("neon"))) uint32x2_t vcgtz_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcgtz_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcgtz_s32(int32x2_t __p0) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcgtz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcgtz_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcgtz_s16(int16x4_t __p0) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcgtz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcgtz_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif __ai __attribute__((target("neon"))) uint64_t vcgtzd_s64(int64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtzd_s64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgtzd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcgtzd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcgtzd_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcgtzd_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcgtzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcgtzs_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcgtzs_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcleq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47050,16 +47990,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vcleq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcleq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47067,67 +48007,67 @@ __ai __attribute__((target("neon"))) uint64x2_t vcleq_f64(float64x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 <= __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcleq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 <= __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 <= __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcle_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 <= __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vcle_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 <= __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vcle_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 <= __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 <= __p1); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcled_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcled_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_s64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcled_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcled_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcled_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcled_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcles_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcles_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcles_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vclezq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vclezq_s8(int8x16_t __p0) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -47135,15 +48075,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vclezq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vclezq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vclezq_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47151,15 +48091,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vclezq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vclezq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vclezq_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -47167,15 +48107,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vclezq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vclezq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vclezq_s32(int32x4_t __p0) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -47183,15 +48123,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vclezq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vclezq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vclezq_s64(int64x2_t __p0) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47199,15 +48139,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vclezq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vclezq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vclezq_s16(int16x8_t __p0) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vclezq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vclezq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -47215,36 +48155,36 @@ __ai __attribute__((target("neon"))) uint16x8_t vclezq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vclez_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vclez_s8(int8x8_t __p0) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vclez_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vclez_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vclez_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -47252,68 +48192,68 @@ __ai __attribute__((target("neon"))) uint32x2_t vclez_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vclez_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vclez_s32(int32x2_t __p0) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vclez_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vclez_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vclez_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vclez_s16(int16x4_t __p0) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vclez_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vclez_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif __ai __attribute__((target("neon"))) uint64_t vclezd_s64(int64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vclezd_s64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vclezd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vclezd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vclezd_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vclezd_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vclezs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclezs_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vclezs_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcltq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47321,16 +48261,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vcltq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcltq_f64(float64x2_t __p0, float64x2_t __p1) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47338,67 +48278,67 @@ __ai __attribute__((target("neon"))) uint64x2_t vcltq_f64(float64x2_t __p0, floa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint64x2_t, __p0 < __p1); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcltq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__rev0 < __rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __rev0 < __rev1); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vclt_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 < __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vclt_f64(float64x1_t __p0, float64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 < __p1); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vclt_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0 < __p1); + __ret = __builtin_bit_cast(uint64x1_t, __p0 < __p1); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcltd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcltd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_s64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcltd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltd_f64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcltd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vclts_f32(float32_t __p0, float32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vclts_f32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vclts_f32(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vcltzq_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vcltzq_s8(int8x16_t __p0) { uint8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -47406,15 +48346,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vcltzq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcltzq_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcltzq_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47422,15 +48362,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vcltzq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcltzq_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcltzq_f32(float32x4_t __p0) { uint32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -47438,15 +48378,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcltzq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vcltzq_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __p0), 34)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vcltzq_s32(int32x4_t __p0) { uint32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __rev0), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -47454,15 +48394,15 @@ __ai __attribute__((target("neon"))) uint32x4_t vcltzq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcltzq_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcltzq_s64(int64x2_t __p0) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47470,15 +48410,15 @@ __ai __attribute__((target("neon"))) uint64x2_t vcltzq_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vcltzq_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__p0, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __p0), 33)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vcltzq_s16(int16x8_t __p0) { uint16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vcltzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vcltzq_v(__builtin_bit_cast(int8x16_t, __rev0), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -47486,36 +48426,36 @@ __ai __attribute__((target("neon"))) uint16x8_t vcltzq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vcltz_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vcltz_s8(int8x8_t __p0) { uint8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcltz_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcltz_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcltz_f32(float32x2_t __p0) { uint32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -47523,53 +48463,53 @@ __ai __attribute__((target("neon"))) uint32x2_t vcltz_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vcltz_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __p0), 2)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vcltz_s32(int32x2_t __p0) { uint32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __rev0), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcltz_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vcltz_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__p0, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __p0), 1)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vcltz_s16(int16x4_t __p0) { uint16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vcltz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vcltz_v(__builtin_bit_cast(int8x8_t, __rev0), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif __ai __attribute__((target("neon"))) uint64_t vcltzd_s64(int64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltzd_s64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcltzd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcltzd_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcltzd_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcltzd_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcltzs_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcltzs_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcltzs_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -47582,7 +48522,7 @@ __ai __attribute__((target("neon"))) poly64x2_t vcombine_p64(poly64x1_t __p0, po __ai __attribute__((target("neon"))) poly64x2_t vcombine_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -47597,966 +48537,1050 @@ __ai __attribute__((target("neon"))) float64x2_t vcombine_f64(float64x1_t __p0, __ai __attribute__((target("neon"))) float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { float64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p8(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ - poly8x16_t __ret_278; \ - poly8x16_t __s0_278 = __p0_278; \ - poly8x8_t __s2_278 = __p2_278; \ - __ret_278 = vsetq_lane_p8(vget_lane_p8(__s2_278, __p3_278), __s0_278, __p1_278); \ - __ret_278; \ -}) -#else -#define vcopyq_lane_p8(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ - poly8x16_t __ret_279; \ - poly8x16_t __s0_279 = __p0_279; \ - poly8x8_t __s2_279 = __p2_279; \ - poly8x16_t __rev0_279; __rev0_279 = __builtin_shufflevector(__s0_279, __s0_279, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev2_279; __rev2_279 = __builtin_shufflevector(__s2_279, __s2_279, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_279 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_279, __p3_279), __rev0_279, __p1_279); \ - __ret_279 = __builtin_shufflevector(__ret_279, __ret_279, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_279; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p16(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ - poly16x8_t __ret_280; \ - poly16x8_t __s0_280 = __p0_280; \ - poly16x4_t __s2_280 = __p2_280; \ - __ret_280 = vsetq_lane_p16(vget_lane_p16(__s2_280, __p3_280), __s0_280, __p1_280); \ - __ret_280; \ -}) -#else -#define vcopyq_lane_p16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ - poly16x8_t __ret_281; \ - poly16x8_t __s0_281 = __p0_281; \ - poly16x4_t __s2_281 = __p2_281; \ - poly16x8_t __rev0_281; __rev0_281 = __builtin_shufflevector(__s0_281, __s0_281, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __rev2_281; __rev2_281 = __builtin_shufflevector(__s2_281, __s2_281, 3, 2, 1, 0); \ - __ret_281 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_281, __p3_281), __rev0_281, __p1_281); \ - __ret_281 = __builtin_shufflevector(__ret_281, __ret_281, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_281; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u8(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ - uint8x16_t __ret_282; \ - uint8x16_t __s0_282 = __p0_282; \ - uint8x8_t __s2_282 = __p2_282; \ - __ret_282 = vsetq_lane_u8(vget_lane_u8(__s2_282, __p3_282), __s0_282, __p1_282); \ +#define vcopyq_lane_p8(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ + poly8x16_t __ret_282; \ + poly8x16_t __s0_282 = __p0_282; \ + poly8x8_t __s2_282 = __p2_282; \ + __ret_282 = vsetq_lane_p8(vget_lane_p8(__s2_282, __p3_282), __s0_282, __p1_282); \ __ret_282; \ }) #else -#define vcopyq_lane_u8(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ - uint8x16_t __ret_283; \ - uint8x16_t __s0_283 = __p0_283; \ - uint8x8_t __s2_283 = __p2_283; \ - uint8x16_t __rev0_283; __rev0_283 = __builtin_shufflevector(__s0_283, __s0_283, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_283; __rev2_283 = __builtin_shufflevector(__s2_283, __s2_283, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_283 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_283, __p3_283), __rev0_283, __p1_283); \ - __ret_283 = __builtin_shufflevector(__ret_283, __ret_283, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_lane_p8(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ + poly8x16_t __ret_283; \ + poly8x16_t __s0_283 = __p0_283; \ + poly8x8_t __s2_283 = __p2_283; \ + poly8x16_t __rev0_283; __rev0_283 = __builtin_shufflevector(__s0_283, __s0_283, __lane_reverse_128_8); \ + poly8x8_t __rev2_283; __rev2_283 = __builtin_shufflevector(__s2_283, __s2_283, __lane_reverse_64_8); \ + __ret_283 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_283, __p3_283), __rev0_283, __p1_283); \ + __ret_283 = __builtin_shufflevector(__ret_283, __ret_283, __lane_reverse_128_8); \ __ret_283; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u32(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ - uint32x4_t __ret_284; \ - uint32x4_t __s0_284 = __p0_284; \ - uint32x2_t __s2_284 = __p2_284; \ - __ret_284 = vsetq_lane_u32(vget_lane_u32(__s2_284, __p3_284), __s0_284, __p1_284); \ +#define vcopyq_lane_p16(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ + poly16x8_t __ret_284; \ + poly16x8_t __s0_284 = __p0_284; \ + poly16x4_t __s2_284 = __p2_284; \ + __ret_284 = vsetq_lane_p16(vget_lane_p16(__s2_284, __p3_284), __s0_284, __p1_284); \ __ret_284; \ }) #else -#define vcopyq_lane_u32(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ - uint32x4_t __ret_285; \ - uint32x4_t __s0_285 = __p0_285; \ - uint32x2_t __s2_285 = __p2_285; \ - uint32x4_t __rev0_285; __rev0_285 = __builtin_shufflevector(__s0_285, __s0_285, 3, 2, 1, 0); \ - uint32x2_t __rev2_285; __rev2_285 = __builtin_shufflevector(__s2_285, __s2_285, 1, 0); \ - __ret_285 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_285, __p3_285), __rev0_285, __p1_285); \ - __ret_285 = __builtin_shufflevector(__ret_285, __ret_285, 3, 2, 1, 0); \ +#define vcopyq_lane_p16(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ + poly16x8_t __ret_285; \ + poly16x8_t __s0_285 = __p0_285; \ + poly16x4_t __s2_285 = __p2_285; \ + poly16x8_t __rev0_285; __rev0_285 = __builtin_shufflevector(__s0_285, __s0_285, __lane_reverse_128_16); \ + poly16x4_t __rev2_285; __rev2_285 = __builtin_shufflevector(__s2_285, __s2_285, __lane_reverse_64_16); \ + __ret_285 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_285, __p3_285), __rev0_285, __p1_285); \ + __ret_285 = __builtin_shufflevector(__ret_285, __ret_285, __lane_reverse_128_16); \ __ret_285; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u64(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ - uint64x2_t __ret_286; \ - uint64x2_t __s0_286 = __p0_286; \ - uint64x1_t __s2_286 = __p2_286; \ - __ret_286 = vsetq_lane_u64(vget_lane_u64(__s2_286, __p3_286), __s0_286, __p1_286); \ +#define vcopyq_lane_u8(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ + uint8x16_t __ret_286; \ + uint8x16_t __s0_286 = __p0_286; \ + uint8x8_t __s2_286 = __p2_286; \ + __ret_286 = vsetq_lane_u8(vget_lane_u8(__s2_286, __p3_286), __s0_286, __p1_286); \ __ret_286; \ }) #else -#define vcopyq_lane_u64(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ - uint64x2_t __ret_287; \ - uint64x2_t __s0_287 = __p0_287; \ - uint64x1_t __s2_287 = __p2_287; \ - uint64x2_t __rev0_287; __rev0_287 = __builtin_shufflevector(__s0_287, __s0_287, 1, 0); \ - __ret_287 = __noswap_vsetq_lane_u64(vget_lane_u64(__s2_287, __p3_287), __rev0_287, __p1_287); \ - __ret_287 = __builtin_shufflevector(__ret_287, __ret_287, 1, 0); \ +#define vcopyq_lane_u8(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ + uint8x16_t __ret_287; \ + uint8x16_t __s0_287 = __p0_287; \ + uint8x8_t __s2_287 = __p2_287; \ + uint8x16_t __rev0_287; __rev0_287 = __builtin_shufflevector(__s0_287, __s0_287, __lane_reverse_128_8); \ + uint8x8_t __rev2_287; __rev2_287 = __builtin_shufflevector(__s2_287, __s2_287, __lane_reverse_64_8); \ + __ret_287 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_287, __p3_287), __rev0_287, __p1_287); \ + __ret_287 = __builtin_shufflevector(__ret_287, __ret_287, __lane_reverse_128_8); \ __ret_287; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u16(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ - uint16x8_t __ret_288; \ - uint16x8_t __s0_288 = __p0_288; \ - uint16x4_t __s2_288 = __p2_288; \ - __ret_288 = vsetq_lane_u16(vget_lane_u16(__s2_288, __p3_288), __s0_288, __p1_288); \ +#define vcopyq_lane_u32(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ + uint32x4_t __ret_288; \ + uint32x4_t __s0_288 = __p0_288; \ + uint32x2_t __s2_288 = __p2_288; \ + __ret_288 = vsetq_lane_u32(vget_lane_u32(__s2_288, __p3_288), __s0_288, __p1_288); \ __ret_288; \ }) #else -#define vcopyq_lane_u16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ - uint16x8_t __ret_289; \ - uint16x8_t __s0_289 = __p0_289; \ - uint16x4_t __s2_289 = __p2_289; \ - uint16x8_t __rev0_289; __rev0_289 = __builtin_shufflevector(__s0_289, __s0_289, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_289; __rev2_289 = __builtin_shufflevector(__s2_289, __s2_289, 3, 2, 1, 0); \ - __ret_289 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_289, __p3_289), __rev0_289, __p1_289); \ - __ret_289 = __builtin_shufflevector(__ret_289, __ret_289, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_lane_u32(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ + uint32x4_t __ret_289; \ + uint32x4_t __s0_289 = __p0_289; \ + uint32x2_t __s2_289 = __p2_289; \ + uint32x4_t __rev0_289; __rev0_289 = __builtin_shufflevector(__s0_289, __s0_289, __lane_reverse_128_32); \ + uint32x2_t __rev2_289; __rev2_289 = __builtin_shufflevector(__s2_289, __s2_289, __lane_reverse_64_32); \ + __ret_289 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_289, __p3_289), __rev0_289, __p1_289); \ + __ret_289 = __builtin_shufflevector(__ret_289, __ret_289, __lane_reverse_128_32); \ __ret_289; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s8(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ - int8x16_t __ret_290; \ - int8x16_t __s0_290 = __p0_290; \ - int8x8_t __s2_290 = __p2_290; \ - __ret_290 = vsetq_lane_s8(vget_lane_s8(__s2_290, __p3_290), __s0_290, __p1_290); \ +#define vcopyq_lane_u64(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ + uint64x2_t __ret_290; \ + uint64x2_t __s0_290 = __p0_290; \ + uint64x1_t __s2_290 = __p2_290; \ + __ret_290 = vsetq_lane_u64(vget_lane_u64(__s2_290, __p3_290), __s0_290, __p1_290); \ __ret_290; \ }) #else -#define vcopyq_lane_s8(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ - int8x16_t __ret_291; \ - int8x16_t __s0_291 = __p0_291; \ - int8x8_t __s2_291 = __p2_291; \ - int8x16_t __rev0_291; __rev0_291 = __builtin_shufflevector(__s0_291, __s0_291, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_291; __rev2_291 = __builtin_shufflevector(__s2_291, __s2_291, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_291 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_291, __p3_291), __rev0_291, __p1_291); \ - __ret_291 = __builtin_shufflevector(__ret_291, __ret_291, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_lane_u64(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ + uint64x2_t __ret_291; \ + uint64x2_t __s0_291 = __p0_291; \ + uint64x1_t __s2_291 = __p2_291; \ + uint64x2_t __rev0_291; __rev0_291 = __builtin_shufflevector(__s0_291, __s0_291, __lane_reverse_128_64); \ + __ret_291 = __noswap_vsetq_lane_u64(vget_lane_u64(__s2_291, __p3_291), __rev0_291, __p1_291); \ + __ret_291 = __builtin_shufflevector(__ret_291, __ret_291, __lane_reverse_128_64); \ __ret_291; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f32(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ - float32x4_t __ret_292; \ - float32x4_t __s0_292 = __p0_292; \ - float32x2_t __s2_292 = __p2_292; \ - __ret_292 = vsetq_lane_f32(vget_lane_f32(__s2_292, __p3_292), __s0_292, __p1_292); \ +#define vcopyq_lane_u16(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ + uint16x8_t __ret_292; \ + uint16x8_t __s0_292 = __p0_292; \ + uint16x4_t __s2_292 = __p2_292; \ + __ret_292 = vsetq_lane_u16(vget_lane_u16(__s2_292, __p3_292), __s0_292, __p1_292); \ __ret_292; \ }) #else -#define vcopyq_lane_f32(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ - float32x4_t __ret_293; \ - float32x4_t __s0_293 = __p0_293; \ - float32x2_t __s2_293 = __p2_293; \ - float32x4_t __rev0_293; __rev0_293 = __builtin_shufflevector(__s0_293, __s0_293, 3, 2, 1, 0); \ - float32x2_t __rev2_293; __rev2_293 = __builtin_shufflevector(__s2_293, __s2_293, 1, 0); \ - __ret_293 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_293, __p3_293), __rev0_293, __p1_293); \ - __ret_293 = __builtin_shufflevector(__ret_293, __ret_293, 3, 2, 1, 0); \ +#define vcopyq_lane_u16(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ + uint16x8_t __ret_293; \ + uint16x8_t __s0_293 = __p0_293; \ + uint16x4_t __s2_293 = __p2_293; \ + uint16x8_t __rev0_293; __rev0_293 = __builtin_shufflevector(__s0_293, __s0_293, __lane_reverse_128_16); \ + uint16x4_t __rev2_293; __rev2_293 = __builtin_shufflevector(__s2_293, __s2_293, __lane_reverse_64_16); \ + __ret_293 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_293, __p3_293), __rev0_293, __p1_293); \ + __ret_293 = __builtin_shufflevector(__ret_293, __ret_293, __lane_reverse_128_16); \ __ret_293; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s32(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ - int32x4_t __ret_294; \ - int32x4_t __s0_294 = __p0_294; \ - int32x2_t __s2_294 = __p2_294; \ - __ret_294 = vsetq_lane_s32(vget_lane_s32(__s2_294, __p3_294), __s0_294, __p1_294); \ +#define vcopyq_lane_s8(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ + int8x16_t __ret_294; \ + int8x16_t __s0_294 = __p0_294; \ + int8x8_t __s2_294 = __p2_294; \ + __ret_294 = vsetq_lane_s8(vget_lane_s8(__s2_294, __p3_294), __s0_294, __p1_294); \ __ret_294; \ }) #else -#define vcopyq_lane_s32(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ - int32x4_t __ret_295; \ - int32x4_t __s0_295 = __p0_295; \ - int32x2_t __s2_295 = __p2_295; \ - int32x4_t __rev0_295; __rev0_295 = __builtin_shufflevector(__s0_295, __s0_295, 3, 2, 1, 0); \ - int32x2_t __rev2_295; __rev2_295 = __builtin_shufflevector(__s2_295, __s2_295, 1, 0); \ - __ret_295 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_295, __p3_295), __rev0_295, __p1_295); \ - __ret_295 = __builtin_shufflevector(__ret_295, __ret_295, 3, 2, 1, 0); \ +#define vcopyq_lane_s8(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ + int8x16_t __ret_295; \ + int8x16_t __s0_295 = __p0_295; \ + int8x8_t __s2_295 = __p2_295; \ + int8x16_t __rev0_295; __rev0_295 = __builtin_shufflevector(__s0_295, __s0_295, __lane_reverse_128_8); \ + int8x8_t __rev2_295; __rev2_295 = __builtin_shufflevector(__s2_295, __s2_295, __lane_reverse_64_8); \ + __ret_295 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_295, __p3_295), __rev0_295, __p1_295); \ + __ret_295 = __builtin_shufflevector(__ret_295, __ret_295, __lane_reverse_128_8); \ __ret_295; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s64(__p0_296, __p1_296, __p2_296, __p3_296) __extension__ ({ \ - int64x2_t __ret_296; \ - int64x2_t __s0_296 = __p0_296; \ - int64x1_t __s2_296 = __p2_296; \ - __ret_296 = vsetq_lane_s64(vget_lane_s64(__s2_296, __p3_296), __s0_296, __p1_296); \ +#define vcopyq_lane_f32(__p0_296, __p1_296, __p2_296, __p3_296) __extension__ ({ \ + float32x4_t __ret_296; \ + float32x4_t __s0_296 = __p0_296; \ + float32x2_t __s2_296 = __p2_296; \ + __ret_296 = vsetq_lane_f32(vget_lane_f32(__s2_296, __p3_296), __s0_296, __p1_296); \ __ret_296; \ }) #else -#define vcopyq_lane_s64(__p0_297, __p1_297, __p2_297, __p3_297) __extension__ ({ \ - int64x2_t __ret_297; \ - int64x2_t __s0_297 = __p0_297; \ - int64x1_t __s2_297 = __p2_297; \ - int64x2_t __rev0_297; __rev0_297 = __builtin_shufflevector(__s0_297, __s0_297, 1, 0); \ - __ret_297 = __noswap_vsetq_lane_s64(vget_lane_s64(__s2_297, __p3_297), __rev0_297, __p1_297); \ - __ret_297 = __builtin_shufflevector(__ret_297, __ret_297, 1, 0); \ +#define vcopyq_lane_f32(__p0_297, __p1_297, __p2_297, __p3_297) __extension__ ({ \ + float32x4_t __ret_297; \ + float32x4_t __s0_297 = __p0_297; \ + float32x2_t __s2_297 = __p2_297; \ + float32x4_t __rev0_297; __rev0_297 = __builtin_shufflevector(__s0_297, __s0_297, __lane_reverse_128_32); \ + float32x2_t __rev2_297; __rev2_297 = __builtin_shufflevector(__s2_297, __s2_297, __lane_reverse_64_32); \ + __ret_297 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_297, __p3_297), __rev0_297, __p1_297); \ + __ret_297 = __builtin_shufflevector(__ret_297, __ret_297, __lane_reverse_128_32); \ __ret_297; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s16(__p0_298, __p1_298, __p2_298, __p3_298) __extension__ ({ \ - int16x8_t __ret_298; \ - int16x8_t __s0_298 = __p0_298; \ - int16x4_t __s2_298 = __p2_298; \ - __ret_298 = vsetq_lane_s16(vget_lane_s16(__s2_298, __p3_298), __s0_298, __p1_298); \ +#define vcopyq_lane_s32(__p0_298, __p1_298, __p2_298, __p3_298) __extension__ ({ \ + int32x4_t __ret_298; \ + int32x4_t __s0_298 = __p0_298; \ + int32x2_t __s2_298 = __p2_298; \ + __ret_298 = vsetq_lane_s32(vget_lane_s32(__s2_298, __p3_298), __s0_298, __p1_298); \ __ret_298; \ }) #else -#define vcopyq_lane_s16(__p0_299, __p1_299, __p2_299, __p3_299) __extension__ ({ \ - int16x8_t __ret_299; \ - int16x8_t __s0_299 = __p0_299; \ - int16x4_t __s2_299 = __p2_299; \ - int16x8_t __rev0_299; __rev0_299 = __builtin_shufflevector(__s0_299, __s0_299, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_299; __rev2_299 = __builtin_shufflevector(__s2_299, __s2_299, 3, 2, 1, 0); \ - __ret_299 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_299, __p3_299), __rev0_299, __p1_299); \ - __ret_299 = __builtin_shufflevector(__ret_299, __ret_299, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_lane_s32(__p0_299, __p1_299, __p2_299, __p3_299) __extension__ ({ \ + int32x4_t __ret_299; \ + int32x4_t __s0_299 = __p0_299; \ + int32x2_t __s2_299 = __p2_299; \ + int32x4_t __rev0_299; __rev0_299 = __builtin_shufflevector(__s0_299, __s0_299, __lane_reverse_128_32); \ + int32x2_t __rev2_299; __rev2_299 = __builtin_shufflevector(__s2_299, __s2_299, __lane_reverse_64_32); \ + __ret_299 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_299, __p3_299), __rev0_299, __p1_299); \ + __ret_299 = __builtin_shufflevector(__ret_299, __ret_299, __lane_reverse_128_32); \ __ret_299; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p8(__p0_300, __p1_300, __p2_300, __p3_300) __extension__ ({ \ - poly8x8_t __ret_300; \ - poly8x8_t __s0_300 = __p0_300; \ - poly8x8_t __s2_300 = __p2_300; \ - __ret_300 = vset_lane_p8(vget_lane_p8(__s2_300, __p3_300), __s0_300, __p1_300); \ +#define vcopyq_lane_s64(__p0_300, __p1_300, __p2_300, __p3_300) __extension__ ({ \ + int64x2_t __ret_300; \ + int64x2_t __s0_300 = __p0_300; \ + int64x1_t __s2_300 = __p2_300; \ + __ret_300 = vsetq_lane_s64(vget_lane_s64(__s2_300, __p3_300), __s0_300, __p1_300); \ __ret_300; \ }) #else -#define vcopy_lane_p8(__p0_301, __p1_301, __p2_301, __p3_301) __extension__ ({ \ - poly8x8_t __ret_301; \ - poly8x8_t __s0_301 = __p0_301; \ - poly8x8_t __s2_301 = __p2_301; \ - poly8x8_t __rev0_301; __rev0_301 = __builtin_shufflevector(__s0_301, __s0_301, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev2_301; __rev2_301 = __builtin_shufflevector(__s2_301, __s2_301, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_301 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_301, __p3_301), __rev0_301, __p1_301); \ - __ret_301 = __builtin_shufflevector(__ret_301, __ret_301, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_lane_s64(__p0_301, __p1_301, __p2_301, __p3_301) __extension__ ({ \ + int64x2_t __ret_301; \ + int64x2_t __s0_301 = __p0_301; \ + int64x1_t __s2_301 = __p2_301; \ + int64x2_t __rev0_301; __rev0_301 = __builtin_shufflevector(__s0_301, __s0_301, __lane_reverse_128_64); \ + __ret_301 = __noswap_vsetq_lane_s64(vget_lane_s64(__s2_301, __p3_301), __rev0_301, __p1_301); \ + __ret_301 = __builtin_shufflevector(__ret_301, __ret_301, __lane_reverse_128_64); \ __ret_301; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p16(__p0_302, __p1_302, __p2_302, __p3_302) __extension__ ({ \ - poly16x4_t __ret_302; \ - poly16x4_t __s0_302 = __p0_302; \ - poly16x4_t __s2_302 = __p2_302; \ - __ret_302 = vset_lane_p16(vget_lane_p16(__s2_302, __p3_302), __s0_302, __p1_302); \ +#define vcopyq_lane_mf8(__p0_302, __p1_302, __p2_302, __p3_302) __extension__ ({ \ + mfloat8x16_t __ret_302; \ + mfloat8x16_t __s0_302 = __p0_302; \ + mfloat8x8_t __s2_302 = __p2_302; \ + __ret_302 = vsetq_lane_mf8(vget_lane_mf8(__s2_302, __p3_302), __s0_302, __p1_302); \ __ret_302; \ }) #else -#define vcopy_lane_p16(__p0_303, __p1_303, __p2_303, __p3_303) __extension__ ({ \ - poly16x4_t __ret_303; \ - poly16x4_t __s0_303 = __p0_303; \ - poly16x4_t __s2_303 = __p2_303; \ - poly16x4_t __rev0_303; __rev0_303 = __builtin_shufflevector(__s0_303, __s0_303, 3, 2, 1, 0); \ - poly16x4_t __rev2_303; __rev2_303 = __builtin_shufflevector(__s2_303, __s2_303, 3, 2, 1, 0); \ - __ret_303 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_303, __p3_303), __rev0_303, __p1_303); \ - __ret_303 = __builtin_shufflevector(__ret_303, __ret_303, 3, 2, 1, 0); \ +#define vcopyq_lane_mf8(__p0_303, __p1_303, __p2_303, __p3_303) __extension__ ({ \ + mfloat8x16_t __ret_303; \ + mfloat8x16_t __s0_303 = __p0_303; \ + mfloat8x8_t __s2_303 = __p2_303; \ + mfloat8x16_t __rev0_303; __rev0_303 = __builtin_shufflevector(__s0_303, __s0_303, __lane_reverse_128_8); \ + mfloat8x8_t __rev2_303; __rev2_303 = __builtin_shufflevector(__s2_303, __s2_303, __lane_reverse_64_8); \ + __ret_303 = __noswap_vsetq_lane_mf8(__noswap_vget_lane_mf8(__rev2_303, __p3_303), __rev0_303, __p1_303); \ + __ret_303 = __builtin_shufflevector(__ret_303, __ret_303, __lane_reverse_128_8); \ __ret_303; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u8(__p0_304, __p1_304, __p2_304, __p3_304) __extension__ ({ \ - uint8x8_t __ret_304; \ - uint8x8_t __s0_304 = __p0_304; \ - uint8x8_t __s2_304 = __p2_304; \ - __ret_304 = vset_lane_u8(vget_lane_u8(__s2_304, __p3_304), __s0_304, __p1_304); \ +#define vcopyq_lane_s16(__p0_304, __p1_304, __p2_304, __p3_304) __extension__ ({ \ + int16x8_t __ret_304; \ + int16x8_t __s0_304 = __p0_304; \ + int16x4_t __s2_304 = __p2_304; \ + __ret_304 = vsetq_lane_s16(vget_lane_s16(__s2_304, __p3_304), __s0_304, __p1_304); \ __ret_304; \ }) #else -#define vcopy_lane_u8(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ - uint8x8_t __ret_305; \ - uint8x8_t __s0_305 = __p0_305; \ - uint8x8_t __s2_305 = __p2_305; \ - uint8x8_t __rev0_305; __rev0_305 = __builtin_shufflevector(__s0_305, __s0_305, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_305; __rev2_305 = __builtin_shufflevector(__s2_305, __s2_305, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_305 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_305, __p3_305), __rev0_305, __p1_305); \ - __ret_305 = __builtin_shufflevector(__ret_305, __ret_305, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_lane_s16(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ + int16x8_t __ret_305; \ + int16x8_t __s0_305 = __p0_305; \ + int16x4_t __s2_305 = __p2_305; \ + int16x8_t __rev0_305; __rev0_305 = __builtin_shufflevector(__s0_305, __s0_305, __lane_reverse_128_16); \ + int16x4_t __rev2_305; __rev2_305 = __builtin_shufflevector(__s2_305, __s2_305, __lane_reverse_64_16); \ + __ret_305 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_305, __p3_305), __rev0_305, __p1_305); \ + __ret_305 = __builtin_shufflevector(__ret_305, __ret_305, __lane_reverse_128_16); \ __ret_305; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u32(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ - uint32x2_t __ret_306; \ - uint32x2_t __s0_306 = __p0_306; \ - uint32x2_t __s2_306 = __p2_306; \ - __ret_306 = vset_lane_u32(vget_lane_u32(__s2_306, __p3_306), __s0_306, __p1_306); \ +#define vcopy_lane_p8(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ + poly8x8_t __ret_306; \ + poly8x8_t __s0_306 = __p0_306; \ + poly8x8_t __s2_306 = __p2_306; \ + __ret_306 = vset_lane_p8(vget_lane_p8(__s2_306, __p3_306), __s0_306, __p1_306); \ __ret_306; \ }) #else -#define vcopy_lane_u32(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ - uint32x2_t __ret_307; \ - uint32x2_t __s0_307 = __p0_307; \ - uint32x2_t __s2_307 = __p2_307; \ - uint32x2_t __rev0_307; __rev0_307 = __builtin_shufflevector(__s0_307, __s0_307, 1, 0); \ - uint32x2_t __rev2_307; __rev2_307 = __builtin_shufflevector(__s2_307, __s2_307, 1, 0); \ - __ret_307 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_307, __p3_307), __rev0_307, __p1_307); \ - __ret_307 = __builtin_shufflevector(__ret_307, __ret_307, 1, 0); \ +#define vcopy_lane_p8(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ + poly8x8_t __ret_307; \ + poly8x8_t __s0_307 = __p0_307; \ + poly8x8_t __s2_307 = __p2_307; \ + poly8x8_t __rev0_307; __rev0_307 = __builtin_shufflevector(__s0_307, __s0_307, __lane_reverse_64_8); \ + poly8x8_t __rev2_307; __rev2_307 = __builtin_shufflevector(__s2_307, __s2_307, __lane_reverse_64_8); \ + __ret_307 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_307, __p3_307), __rev0_307, __p1_307); \ + __ret_307 = __builtin_shufflevector(__ret_307, __ret_307, __lane_reverse_64_8); \ __ret_307; \ }) #endif -#define vcopy_lane_u64(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ - uint64x1_t __ret_308; \ - uint64x1_t __s0_308 = __p0_308; \ - uint64x1_t __s2_308 = __p2_308; \ - __ret_308 = vset_lane_u64(vget_lane_u64(__s2_308, __p3_308), __s0_308, __p1_308); \ +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_p16(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ + poly16x4_t __ret_308; \ + poly16x4_t __s0_308 = __p0_308; \ + poly16x4_t __s2_308 = __p2_308; \ + __ret_308 = vset_lane_p16(vget_lane_p16(__s2_308, __p3_308), __s0_308, __p1_308); \ __ret_308; \ }) -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u16(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ - uint16x4_t __ret_309; \ - uint16x4_t __s0_309 = __p0_309; \ - uint16x4_t __s2_309 = __p2_309; \ - __ret_309 = vset_lane_u16(vget_lane_u16(__s2_309, __p3_309), __s0_309, __p1_309); \ +#else +#define vcopy_lane_p16(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ + poly16x4_t __ret_309; \ + poly16x4_t __s0_309 = __p0_309; \ + poly16x4_t __s2_309 = __p2_309; \ + poly16x4_t __rev0_309; __rev0_309 = __builtin_shufflevector(__s0_309, __s0_309, __lane_reverse_64_16); \ + poly16x4_t __rev2_309; __rev2_309 = __builtin_shufflevector(__s2_309, __s2_309, __lane_reverse_64_16); \ + __ret_309 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_309, __p3_309), __rev0_309, __p1_309); \ + __ret_309 = __builtin_shufflevector(__ret_309, __ret_309, __lane_reverse_64_16); \ __ret_309; \ }) -#else -#define vcopy_lane_u16(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ - uint16x4_t __ret_310; \ - uint16x4_t __s0_310 = __p0_310; \ - uint16x4_t __s2_310 = __p2_310; \ - uint16x4_t __rev0_310; __rev0_310 = __builtin_shufflevector(__s0_310, __s0_310, 3, 2, 1, 0); \ - uint16x4_t __rev2_310; __rev2_310 = __builtin_shufflevector(__s2_310, __s2_310, 3, 2, 1, 0); \ - __ret_310 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_310, __p3_310), __rev0_310, __p1_310); \ - __ret_310 = __builtin_shufflevector(__ret_310, __ret_310, 3, 2, 1, 0); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_u8(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ + uint8x8_t __ret_310; \ + uint8x8_t __s0_310 = __p0_310; \ + uint8x8_t __s2_310 = __p2_310; \ + __ret_310 = vset_lane_u8(vget_lane_u8(__s2_310, __p3_310), __s0_310, __p1_310); \ __ret_310; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s8(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ - int8x8_t __ret_311; \ - int8x8_t __s0_311 = __p0_311; \ - int8x8_t __s2_311 = __p2_311; \ - __ret_311 = vset_lane_s8(vget_lane_s8(__s2_311, __p3_311), __s0_311, __p1_311); \ +#else +#define vcopy_lane_u8(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ + uint8x8_t __ret_311; \ + uint8x8_t __s0_311 = __p0_311; \ + uint8x8_t __s2_311 = __p2_311; \ + uint8x8_t __rev0_311; __rev0_311 = __builtin_shufflevector(__s0_311, __s0_311, __lane_reverse_64_8); \ + uint8x8_t __rev2_311; __rev2_311 = __builtin_shufflevector(__s2_311, __s2_311, __lane_reverse_64_8); \ + __ret_311 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_311, __p3_311), __rev0_311, __p1_311); \ + __ret_311 = __builtin_shufflevector(__ret_311, __ret_311, __lane_reverse_64_8); \ __ret_311; \ }) -#else -#define vcopy_lane_s8(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ - int8x8_t __ret_312; \ - int8x8_t __s0_312 = __p0_312; \ - int8x8_t __s2_312 = __p2_312; \ - int8x8_t __rev0_312; __rev0_312 = __builtin_shufflevector(__s0_312, __s0_312, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_312; __rev2_312 = __builtin_shufflevector(__s2_312, __s2_312, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_312 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_312, __p3_312), __rev0_312, __p1_312); \ - __ret_312 = __builtin_shufflevector(__ret_312, __ret_312, 7, 6, 5, 4, 3, 2, 1, 0); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_u32(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ + uint32x2_t __ret_312; \ + uint32x2_t __s0_312 = __p0_312; \ + uint32x2_t __s2_312 = __p2_312; \ + __ret_312 = vset_lane_u32(vget_lane_u32(__s2_312, __p3_312), __s0_312, __p1_312); \ __ret_312; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_f32(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ - float32x2_t __ret_313; \ - float32x2_t __s0_313 = __p0_313; \ - float32x2_t __s2_313 = __p2_313; \ - __ret_313 = vset_lane_f32(vget_lane_f32(__s2_313, __p3_313), __s0_313, __p1_313); \ +#else +#define vcopy_lane_u32(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ + uint32x2_t __ret_313; \ + uint32x2_t __s0_313 = __p0_313; \ + uint32x2_t __s2_313 = __p2_313; \ + uint32x2_t __rev0_313; __rev0_313 = __builtin_shufflevector(__s0_313, __s0_313, __lane_reverse_64_32); \ + uint32x2_t __rev2_313; __rev2_313 = __builtin_shufflevector(__s2_313, __s2_313, __lane_reverse_64_32); \ + __ret_313 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_313, __p3_313), __rev0_313, __p1_313); \ + __ret_313 = __builtin_shufflevector(__ret_313, __ret_313, __lane_reverse_64_32); \ __ret_313; \ }) -#else -#define vcopy_lane_f32(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ - float32x2_t __ret_314; \ - float32x2_t __s0_314 = __p0_314; \ - float32x2_t __s2_314 = __p2_314; \ - float32x2_t __rev0_314; __rev0_314 = __builtin_shufflevector(__s0_314, __s0_314, 1, 0); \ - float32x2_t __rev2_314; __rev2_314 = __builtin_shufflevector(__s2_314, __s2_314, 1, 0); \ - __ret_314 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_314, __p3_314), __rev0_314, __p1_314); \ - __ret_314 = __builtin_shufflevector(__ret_314, __ret_314, 1, 0); \ - __ret_314; \ -}) #endif +#define vcopy_lane_u64(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ + uint64x1_t __ret_314; \ + uint64x1_t __s0_314 = __p0_314; \ + uint64x1_t __s2_314 = __p2_314; \ + __ret_314 = vset_lane_u64(vget_lane_u64(__s2_314, __p3_314), __s0_314, __p1_314); \ + __ret_314; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s32(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ - int32x2_t __ret_315; \ - int32x2_t __s0_315 = __p0_315; \ - int32x2_t __s2_315 = __p2_315; \ - __ret_315 = vset_lane_s32(vget_lane_s32(__s2_315, __p3_315), __s0_315, __p1_315); \ +#define vcopy_lane_u16(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ + uint16x4_t __ret_315; \ + uint16x4_t __s0_315 = __p0_315; \ + uint16x4_t __s2_315 = __p2_315; \ + __ret_315 = vset_lane_u16(vget_lane_u16(__s2_315, __p3_315), __s0_315, __p1_315); \ __ret_315; \ }) #else -#define vcopy_lane_s32(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ - int32x2_t __ret_316; \ - int32x2_t __s0_316 = __p0_316; \ - int32x2_t __s2_316 = __p2_316; \ - int32x2_t __rev0_316; __rev0_316 = __builtin_shufflevector(__s0_316, __s0_316, 1, 0); \ - int32x2_t __rev2_316; __rev2_316 = __builtin_shufflevector(__s2_316, __s2_316, 1, 0); \ - __ret_316 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_316, __p3_316), __rev0_316, __p1_316); \ - __ret_316 = __builtin_shufflevector(__ret_316, __ret_316, 1, 0); \ +#define vcopy_lane_u16(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ + uint16x4_t __ret_316; \ + uint16x4_t __s0_316 = __p0_316; \ + uint16x4_t __s2_316 = __p2_316; \ + uint16x4_t __rev0_316; __rev0_316 = __builtin_shufflevector(__s0_316, __s0_316, __lane_reverse_64_16); \ + uint16x4_t __rev2_316; __rev2_316 = __builtin_shufflevector(__s2_316, __s2_316, __lane_reverse_64_16); \ + __ret_316 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_316, __p3_316), __rev0_316, __p1_316); \ + __ret_316 = __builtin_shufflevector(__ret_316, __ret_316, __lane_reverse_64_16); \ __ret_316; \ }) #endif -#define vcopy_lane_s64(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ - int64x1_t __ret_317; \ - int64x1_t __s0_317 = __p0_317; \ - int64x1_t __s2_317 = __p2_317; \ - __ret_317 = vset_lane_s64(vget_lane_s64(__s2_317, __p3_317), __s0_317, __p1_317); \ +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_s8(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ + int8x8_t __ret_317; \ + int8x8_t __s0_317 = __p0_317; \ + int8x8_t __s2_317 = __p2_317; \ + __ret_317 = vset_lane_s8(vget_lane_s8(__s2_317, __p3_317), __s0_317, __p1_317); \ __ret_317; \ }) -#ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s16(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ - int16x4_t __ret_318; \ - int16x4_t __s0_318 = __p0_318; \ - int16x4_t __s2_318 = __p2_318; \ - __ret_318 = vset_lane_s16(vget_lane_s16(__s2_318, __p3_318), __s0_318, __p1_318); \ +#else +#define vcopy_lane_s8(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ + int8x8_t __ret_318; \ + int8x8_t __s0_318 = __p0_318; \ + int8x8_t __s2_318 = __p2_318; \ + int8x8_t __rev0_318; __rev0_318 = __builtin_shufflevector(__s0_318, __s0_318, __lane_reverse_64_8); \ + int8x8_t __rev2_318; __rev2_318 = __builtin_shufflevector(__s2_318, __s2_318, __lane_reverse_64_8); \ + __ret_318 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_318, __p3_318), __rev0_318, __p1_318); \ + __ret_318 = __builtin_shufflevector(__ret_318, __ret_318, __lane_reverse_64_8); \ __ret_318; \ }) -#else -#define vcopy_lane_s16(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ - int16x4_t __ret_319; \ - int16x4_t __s0_319 = __p0_319; \ - int16x4_t __s2_319 = __p2_319; \ - int16x4_t __rev0_319; __rev0_319 = __builtin_shufflevector(__s0_319, __s0_319, 3, 2, 1, 0); \ - int16x4_t __rev2_319; __rev2_319 = __builtin_shufflevector(__s2_319, __s2_319, 3, 2, 1, 0); \ - __ret_319 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_319, __p3_319), __rev0_319, __p1_319); \ - __ret_319 = __builtin_shufflevector(__ret_319, __ret_319, 3, 2, 1, 0); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_f32(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ + float32x2_t __ret_319; \ + float32x2_t __s0_319 = __p0_319; \ + float32x2_t __s2_319 = __p2_319; \ + __ret_319 = vset_lane_f32(vget_lane_f32(__s2_319, __p3_319), __s0_319, __p1_319); \ __ret_319; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p8(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ - poly8x16_t __ret_320; \ - poly8x16_t __s0_320 = __p0_320; \ - poly8x16_t __s2_320 = __p2_320; \ - __ret_320 = vsetq_lane_p8(vgetq_lane_p8(__s2_320, __p3_320), __s0_320, __p1_320); \ +#else +#define vcopy_lane_f32(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ + float32x2_t __ret_320; \ + float32x2_t __s0_320 = __p0_320; \ + float32x2_t __s2_320 = __p2_320; \ + float32x2_t __rev0_320; __rev0_320 = __builtin_shufflevector(__s0_320, __s0_320, __lane_reverse_64_32); \ + float32x2_t __rev2_320; __rev2_320 = __builtin_shufflevector(__s2_320, __s2_320, __lane_reverse_64_32); \ + __ret_320 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_320, __p3_320), __rev0_320, __p1_320); \ + __ret_320 = __builtin_shufflevector(__ret_320, __ret_320, __lane_reverse_64_32); \ __ret_320; \ }) -#else -#define vcopyq_laneq_p8(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ - poly8x16_t __ret_321; \ - poly8x16_t __s0_321 = __p0_321; \ - poly8x16_t __s2_321 = __p2_321; \ - poly8x16_t __rev0_321; __rev0_321 = __builtin_shufflevector(__s0_321, __s0_321, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_321; __rev2_321 = __builtin_shufflevector(__s2_321, __s2_321, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_321 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_321, __p3_321), __rev0_321, __p1_321); \ - __ret_321 = __builtin_shufflevector(__ret_321, __ret_321, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_lane_s32(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ + int32x2_t __ret_321; \ + int32x2_t __s0_321 = __p0_321; \ + int32x2_t __s2_321 = __p2_321; \ + __ret_321 = vset_lane_s32(vget_lane_s32(__s2_321, __p3_321), __s0_321, __p1_321); \ __ret_321; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p16(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ - poly16x8_t __ret_322; \ - poly16x8_t __s0_322 = __p0_322; \ - poly16x8_t __s2_322 = __p2_322; \ - __ret_322 = vsetq_lane_p16(vgetq_lane_p16(__s2_322, __p3_322), __s0_322, __p1_322); \ +#else +#define vcopy_lane_s32(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ + int32x2_t __ret_322; \ + int32x2_t __s0_322 = __p0_322; \ + int32x2_t __s2_322 = __p2_322; \ + int32x2_t __rev0_322; __rev0_322 = __builtin_shufflevector(__s0_322, __s0_322, __lane_reverse_64_32); \ + int32x2_t __rev2_322; __rev2_322 = __builtin_shufflevector(__s2_322, __s2_322, __lane_reverse_64_32); \ + __ret_322 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_322, __p3_322), __rev0_322, __p1_322); \ + __ret_322 = __builtin_shufflevector(__ret_322, __ret_322, __lane_reverse_64_32); \ __ret_322; \ }) -#else -#define vcopyq_laneq_p16(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ - poly16x8_t __ret_323; \ - poly16x8_t __s0_323 = __p0_323; \ - poly16x8_t __s2_323 = __p2_323; \ - poly16x8_t __rev0_323; __rev0_323 = __builtin_shufflevector(__s0_323, __s0_323, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev2_323; __rev2_323 = __builtin_shufflevector(__s2_323, __s2_323, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_323 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_323, __p3_323), __rev0_323, __p1_323); \ - __ret_323 = __builtin_shufflevector(__ret_323, __ret_323, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_323; \ -}) #endif +#define vcopy_lane_s64(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ + int64x1_t __ret_323; \ + int64x1_t __s0_323 = __p0_323; \ + int64x1_t __s2_323 = __p2_323; \ + __ret_323 = vset_lane_s64(vget_lane_s64(__s2_323, __p3_323), __s0_323, __p1_323); \ + __ret_323; \ +}) #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u8(__p0_324, __p1_324, __p2_324, __p3_324) __extension__ ({ \ - uint8x16_t __ret_324; \ - uint8x16_t __s0_324 = __p0_324; \ - uint8x16_t __s2_324 = __p2_324; \ - __ret_324 = vsetq_lane_u8(vgetq_lane_u8(__s2_324, __p3_324), __s0_324, __p1_324); \ +#define vcopy_lane_mf8(__p0_324, __p1_324, __p2_324, __p3_324) __extension__ ({ \ + mfloat8x8_t __ret_324; \ + mfloat8x8_t __s0_324 = __p0_324; \ + mfloat8x8_t __s2_324 = __p2_324; \ + __ret_324 = vset_lane_mf8(vget_lane_mf8(__s2_324, __p3_324), __s0_324, __p1_324); \ __ret_324; \ }) #else -#define vcopyq_laneq_u8(__p0_325, __p1_325, __p2_325, __p3_325) __extension__ ({ \ - uint8x16_t __ret_325; \ - uint8x16_t __s0_325 = __p0_325; \ - uint8x16_t __s2_325 = __p2_325; \ - uint8x16_t __rev0_325; __rev0_325 = __builtin_shufflevector(__s0_325, __s0_325, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_325; __rev2_325 = __builtin_shufflevector(__s2_325, __s2_325, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_325 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_325, __p3_325), __rev0_325, __p1_325); \ - __ret_325 = __builtin_shufflevector(__ret_325, __ret_325, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopy_lane_mf8(__p0_325, __p1_325, __p2_325, __p3_325) __extension__ ({ \ + mfloat8x8_t __ret_325; \ + mfloat8x8_t __s0_325 = __p0_325; \ + mfloat8x8_t __s2_325 = __p2_325; \ + mfloat8x8_t __rev0_325; __rev0_325 = __builtin_shufflevector(__s0_325, __s0_325, __lane_reverse_64_8); \ + mfloat8x8_t __rev2_325; __rev2_325 = __builtin_shufflevector(__s2_325, __s2_325, __lane_reverse_64_8); \ + __ret_325 = __noswap_vset_lane_mf8(__noswap_vget_lane_mf8(__rev2_325, __p3_325), __rev0_325, __p1_325); \ + __ret_325 = __builtin_shufflevector(__ret_325, __ret_325, __lane_reverse_64_8); \ __ret_325; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u32(__p0_326, __p1_326, __p2_326, __p3_326) __extension__ ({ \ - uint32x4_t __ret_326; \ - uint32x4_t __s0_326 = __p0_326; \ - uint32x4_t __s2_326 = __p2_326; \ - __ret_326 = vsetq_lane_u32(vgetq_lane_u32(__s2_326, __p3_326), __s0_326, __p1_326); \ +#define vcopy_lane_s16(__p0_326, __p1_326, __p2_326, __p3_326) __extension__ ({ \ + int16x4_t __ret_326; \ + int16x4_t __s0_326 = __p0_326; \ + int16x4_t __s2_326 = __p2_326; \ + __ret_326 = vset_lane_s16(vget_lane_s16(__s2_326, __p3_326), __s0_326, __p1_326); \ __ret_326; \ }) #else -#define vcopyq_laneq_u32(__p0_327, __p1_327, __p2_327, __p3_327) __extension__ ({ \ - uint32x4_t __ret_327; \ - uint32x4_t __s0_327 = __p0_327; \ - uint32x4_t __s2_327 = __p2_327; \ - uint32x4_t __rev0_327; __rev0_327 = __builtin_shufflevector(__s0_327, __s0_327, 3, 2, 1, 0); \ - uint32x4_t __rev2_327; __rev2_327 = __builtin_shufflevector(__s2_327, __s2_327, 3, 2, 1, 0); \ - __ret_327 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_327, __p3_327), __rev0_327, __p1_327); \ - __ret_327 = __builtin_shufflevector(__ret_327, __ret_327, 3, 2, 1, 0); \ +#define vcopy_lane_s16(__p0_327, __p1_327, __p2_327, __p3_327) __extension__ ({ \ + int16x4_t __ret_327; \ + int16x4_t __s0_327 = __p0_327; \ + int16x4_t __s2_327 = __p2_327; \ + int16x4_t __rev0_327; __rev0_327 = __builtin_shufflevector(__s0_327, __s0_327, __lane_reverse_64_16); \ + int16x4_t __rev2_327; __rev2_327 = __builtin_shufflevector(__s2_327, __s2_327, __lane_reverse_64_16); \ + __ret_327 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_327, __p3_327), __rev0_327, __p1_327); \ + __ret_327 = __builtin_shufflevector(__ret_327, __ret_327, __lane_reverse_64_16); \ __ret_327; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u64(__p0_328, __p1_328, __p2_328, __p3_328) __extension__ ({ \ - uint64x2_t __ret_328; \ - uint64x2_t __s0_328 = __p0_328; \ - uint64x2_t __s2_328 = __p2_328; \ - __ret_328 = vsetq_lane_u64(vgetq_lane_u64(__s2_328, __p3_328), __s0_328, __p1_328); \ +#define vcopyq_laneq_p8(__p0_328, __p1_328, __p2_328, __p3_328) __extension__ ({ \ + poly8x16_t __ret_328; \ + poly8x16_t __s0_328 = __p0_328; \ + poly8x16_t __s2_328 = __p2_328; \ + __ret_328 = vsetq_lane_p8(vgetq_lane_p8(__s2_328, __p3_328), __s0_328, __p1_328); \ __ret_328; \ }) #else -#define vcopyq_laneq_u64(__p0_329, __p1_329, __p2_329, __p3_329) __extension__ ({ \ - uint64x2_t __ret_329; \ - uint64x2_t __s0_329 = __p0_329; \ - uint64x2_t __s2_329 = __p2_329; \ - uint64x2_t __rev0_329; __rev0_329 = __builtin_shufflevector(__s0_329, __s0_329, 1, 0); \ - uint64x2_t __rev2_329; __rev2_329 = __builtin_shufflevector(__s2_329, __s2_329, 1, 0); \ - __ret_329 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_329, __p3_329), __rev0_329, __p1_329); \ - __ret_329 = __builtin_shufflevector(__ret_329, __ret_329, 1, 0); \ +#define vcopyq_laneq_p8(__p0_329, __p1_329, __p2_329, __p3_329) __extension__ ({ \ + poly8x16_t __ret_329; \ + poly8x16_t __s0_329 = __p0_329; \ + poly8x16_t __s2_329 = __p2_329; \ + poly8x16_t __rev0_329; __rev0_329 = __builtin_shufflevector(__s0_329, __s0_329, __lane_reverse_128_8); \ + poly8x16_t __rev2_329; __rev2_329 = __builtin_shufflevector(__s2_329, __s2_329, __lane_reverse_128_8); \ + __ret_329 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_329, __p3_329), __rev0_329, __p1_329); \ + __ret_329 = __builtin_shufflevector(__ret_329, __ret_329, __lane_reverse_128_8); \ __ret_329; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u16(__p0_330, __p1_330, __p2_330, __p3_330) __extension__ ({ \ - uint16x8_t __ret_330; \ - uint16x8_t __s0_330 = __p0_330; \ - uint16x8_t __s2_330 = __p2_330; \ - __ret_330 = vsetq_lane_u16(vgetq_lane_u16(__s2_330, __p3_330), __s0_330, __p1_330); \ +#define vcopyq_laneq_p16(__p0_330, __p1_330, __p2_330, __p3_330) __extension__ ({ \ + poly16x8_t __ret_330; \ + poly16x8_t __s0_330 = __p0_330; \ + poly16x8_t __s2_330 = __p2_330; \ + __ret_330 = vsetq_lane_p16(vgetq_lane_p16(__s2_330, __p3_330), __s0_330, __p1_330); \ __ret_330; \ }) #else -#define vcopyq_laneq_u16(__p0_331, __p1_331, __p2_331, __p3_331) __extension__ ({ \ - uint16x8_t __ret_331; \ - uint16x8_t __s0_331 = __p0_331; \ - uint16x8_t __s2_331 = __p2_331; \ - uint16x8_t __rev0_331; __rev0_331 = __builtin_shufflevector(__s0_331, __s0_331, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_331; __rev2_331 = __builtin_shufflevector(__s2_331, __s2_331, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_331 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_331, __p3_331), __rev0_331, __p1_331); \ - __ret_331 = __builtin_shufflevector(__ret_331, __ret_331, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_p16(__p0_331, __p1_331, __p2_331, __p3_331) __extension__ ({ \ + poly16x8_t __ret_331; \ + poly16x8_t __s0_331 = __p0_331; \ + poly16x8_t __s2_331 = __p2_331; \ + poly16x8_t __rev0_331; __rev0_331 = __builtin_shufflevector(__s0_331, __s0_331, __lane_reverse_128_16); \ + poly16x8_t __rev2_331; __rev2_331 = __builtin_shufflevector(__s2_331, __s2_331, __lane_reverse_128_16); \ + __ret_331 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_331, __p3_331), __rev0_331, __p1_331); \ + __ret_331 = __builtin_shufflevector(__ret_331, __ret_331, __lane_reverse_128_16); \ __ret_331; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s8(__p0_332, __p1_332, __p2_332, __p3_332) __extension__ ({ \ - int8x16_t __ret_332; \ - int8x16_t __s0_332 = __p0_332; \ - int8x16_t __s2_332 = __p2_332; \ - __ret_332 = vsetq_lane_s8(vgetq_lane_s8(__s2_332, __p3_332), __s0_332, __p1_332); \ +#define vcopyq_laneq_u8(__p0_332, __p1_332, __p2_332, __p3_332) __extension__ ({ \ + uint8x16_t __ret_332; \ + uint8x16_t __s0_332 = __p0_332; \ + uint8x16_t __s2_332 = __p2_332; \ + __ret_332 = vsetq_lane_u8(vgetq_lane_u8(__s2_332, __p3_332), __s0_332, __p1_332); \ __ret_332; \ }) #else -#define vcopyq_laneq_s8(__p0_333, __p1_333, __p2_333, __p3_333) __extension__ ({ \ - int8x16_t __ret_333; \ - int8x16_t __s0_333 = __p0_333; \ - int8x16_t __s2_333 = __p2_333; \ - int8x16_t __rev0_333; __rev0_333 = __builtin_shufflevector(__s0_333, __s0_333, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_333; __rev2_333 = __builtin_shufflevector(__s2_333, __s2_333, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_333 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_333, __p3_333), __rev0_333, __p1_333); \ - __ret_333 = __builtin_shufflevector(__ret_333, __ret_333, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_u8(__p0_333, __p1_333, __p2_333, __p3_333) __extension__ ({ \ + uint8x16_t __ret_333; \ + uint8x16_t __s0_333 = __p0_333; \ + uint8x16_t __s2_333 = __p2_333; \ + uint8x16_t __rev0_333; __rev0_333 = __builtin_shufflevector(__s0_333, __s0_333, __lane_reverse_128_8); \ + uint8x16_t __rev2_333; __rev2_333 = __builtin_shufflevector(__s2_333, __s2_333, __lane_reverse_128_8); \ + __ret_333 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_333, __p3_333), __rev0_333, __p1_333); \ + __ret_333 = __builtin_shufflevector(__ret_333, __ret_333, __lane_reverse_128_8); \ __ret_333; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f32(__p0_334, __p1_334, __p2_334, __p3_334) __extension__ ({ \ - float32x4_t __ret_334; \ - float32x4_t __s0_334 = __p0_334; \ - float32x4_t __s2_334 = __p2_334; \ - __ret_334 = vsetq_lane_f32(vgetq_lane_f32(__s2_334, __p3_334), __s0_334, __p1_334); \ +#define vcopyq_laneq_u32(__p0_334, __p1_334, __p2_334, __p3_334) __extension__ ({ \ + uint32x4_t __ret_334; \ + uint32x4_t __s0_334 = __p0_334; \ + uint32x4_t __s2_334 = __p2_334; \ + __ret_334 = vsetq_lane_u32(vgetq_lane_u32(__s2_334, __p3_334), __s0_334, __p1_334); \ __ret_334; \ }) #else -#define vcopyq_laneq_f32(__p0_335, __p1_335, __p2_335, __p3_335) __extension__ ({ \ - float32x4_t __ret_335; \ - float32x4_t __s0_335 = __p0_335; \ - float32x4_t __s2_335 = __p2_335; \ - float32x4_t __rev0_335; __rev0_335 = __builtin_shufflevector(__s0_335, __s0_335, 3, 2, 1, 0); \ - float32x4_t __rev2_335; __rev2_335 = __builtin_shufflevector(__s2_335, __s2_335, 3, 2, 1, 0); \ - __ret_335 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_335, __p3_335), __rev0_335, __p1_335); \ - __ret_335 = __builtin_shufflevector(__ret_335, __ret_335, 3, 2, 1, 0); \ +#define vcopyq_laneq_u32(__p0_335, __p1_335, __p2_335, __p3_335) __extension__ ({ \ + uint32x4_t __ret_335; \ + uint32x4_t __s0_335 = __p0_335; \ + uint32x4_t __s2_335 = __p2_335; \ + uint32x4_t __rev0_335; __rev0_335 = __builtin_shufflevector(__s0_335, __s0_335, __lane_reverse_128_32); \ + uint32x4_t __rev2_335; __rev2_335 = __builtin_shufflevector(__s2_335, __s2_335, __lane_reverse_128_32); \ + __ret_335 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_335, __p3_335), __rev0_335, __p1_335); \ + __ret_335 = __builtin_shufflevector(__ret_335, __ret_335, __lane_reverse_128_32); \ __ret_335; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s32(__p0_336, __p1_336, __p2_336, __p3_336) __extension__ ({ \ - int32x4_t __ret_336; \ - int32x4_t __s0_336 = __p0_336; \ - int32x4_t __s2_336 = __p2_336; \ - __ret_336 = vsetq_lane_s32(vgetq_lane_s32(__s2_336, __p3_336), __s0_336, __p1_336); \ +#define vcopyq_laneq_u64(__p0_336, __p1_336, __p2_336, __p3_336) __extension__ ({ \ + uint64x2_t __ret_336; \ + uint64x2_t __s0_336 = __p0_336; \ + uint64x2_t __s2_336 = __p2_336; \ + __ret_336 = vsetq_lane_u64(vgetq_lane_u64(__s2_336, __p3_336), __s0_336, __p1_336); \ __ret_336; \ }) #else -#define vcopyq_laneq_s32(__p0_337, __p1_337, __p2_337, __p3_337) __extension__ ({ \ - int32x4_t __ret_337; \ - int32x4_t __s0_337 = __p0_337; \ - int32x4_t __s2_337 = __p2_337; \ - int32x4_t __rev0_337; __rev0_337 = __builtin_shufflevector(__s0_337, __s0_337, 3, 2, 1, 0); \ - int32x4_t __rev2_337; __rev2_337 = __builtin_shufflevector(__s2_337, __s2_337, 3, 2, 1, 0); \ - __ret_337 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_337, __p3_337), __rev0_337, __p1_337); \ - __ret_337 = __builtin_shufflevector(__ret_337, __ret_337, 3, 2, 1, 0); \ +#define vcopyq_laneq_u64(__p0_337, __p1_337, __p2_337, __p3_337) __extension__ ({ \ + uint64x2_t __ret_337; \ + uint64x2_t __s0_337 = __p0_337; \ + uint64x2_t __s2_337 = __p2_337; \ + uint64x2_t __rev0_337; __rev0_337 = __builtin_shufflevector(__s0_337, __s0_337, __lane_reverse_128_64); \ + uint64x2_t __rev2_337; __rev2_337 = __builtin_shufflevector(__s2_337, __s2_337, __lane_reverse_128_64); \ + __ret_337 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_337, __p3_337), __rev0_337, __p1_337); \ + __ret_337 = __builtin_shufflevector(__ret_337, __ret_337, __lane_reverse_128_64); \ __ret_337; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s64(__p0_338, __p1_338, __p2_338, __p3_338) __extension__ ({ \ - int64x2_t __ret_338; \ - int64x2_t __s0_338 = __p0_338; \ - int64x2_t __s2_338 = __p2_338; \ - __ret_338 = vsetq_lane_s64(vgetq_lane_s64(__s2_338, __p3_338), __s0_338, __p1_338); \ +#define vcopyq_laneq_u16(__p0_338, __p1_338, __p2_338, __p3_338) __extension__ ({ \ + uint16x8_t __ret_338; \ + uint16x8_t __s0_338 = __p0_338; \ + uint16x8_t __s2_338 = __p2_338; \ + __ret_338 = vsetq_lane_u16(vgetq_lane_u16(__s2_338, __p3_338), __s0_338, __p1_338); \ __ret_338; \ }) #else -#define vcopyq_laneq_s64(__p0_339, __p1_339, __p2_339, __p3_339) __extension__ ({ \ - int64x2_t __ret_339; \ - int64x2_t __s0_339 = __p0_339; \ - int64x2_t __s2_339 = __p2_339; \ - int64x2_t __rev0_339; __rev0_339 = __builtin_shufflevector(__s0_339, __s0_339, 1, 0); \ - int64x2_t __rev2_339; __rev2_339 = __builtin_shufflevector(__s2_339, __s2_339, 1, 0); \ - __ret_339 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_339, __p3_339), __rev0_339, __p1_339); \ - __ret_339 = __builtin_shufflevector(__ret_339, __ret_339, 1, 0); \ +#define vcopyq_laneq_u16(__p0_339, __p1_339, __p2_339, __p3_339) __extension__ ({ \ + uint16x8_t __ret_339; \ + uint16x8_t __s0_339 = __p0_339; \ + uint16x8_t __s2_339 = __p2_339; \ + uint16x8_t __rev0_339; __rev0_339 = __builtin_shufflevector(__s0_339, __s0_339, __lane_reverse_128_16); \ + uint16x8_t __rev2_339; __rev2_339 = __builtin_shufflevector(__s2_339, __s2_339, __lane_reverse_128_16); \ + __ret_339 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_339, __p3_339), __rev0_339, __p1_339); \ + __ret_339 = __builtin_shufflevector(__ret_339, __ret_339, __lane_reverse_128_16); \ __ret_339; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s16(__p0_340, __p1_340, __p2_340, __p3_340) __extension__ ({ \ - int16x8_t __ret_340; \ - int16x8_t __s0_340 = __p0_340; \ - int16x8_t __s2_340 = __p2_340; \ - __ret_340 = vsetq_lane_s16(vgetq_lane_s16(__s2_340, __p3_340), __s0_340, __p1_340); \ +#define vcopyq_laneq_s8(__p0_340, __p1_340, __p2_340, __p3_340) __extension__ ({ \ + int8x16_t __ret_340; \ + int8x16_t __s0_340 = __p0_340; \ + int8x16_t __s2_340 = __p2_340; \ + __ret_340 = vsetq_lane_s8(vgetq_lane_s8(__s2_340, __p3_340), __s0_340, __p1_340); \ __ret_340; \ }) #else -#define vcopyq_laneq_s16(__p0_341, __p1_341, __p2_341, __p3_341) __extension__ ({ \ - int16x8_t __ret_341; \ - int16x8_t __s0_341 = __p0_341; \ - int16x8_t __s2_341 = __p2_341; \ - int16x8_t __rev0_341; __rev0_341 = __builtin_shufflevector(__s0_341, __s0_341, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_341; __rev2_341 = __builtin_shufflevector(__s2_341, __s2_341, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_341 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_341, __p3_341), __rev0_341, __p1_341); \ - __ret_341 = __builtin_shufflevector(__ret_341, __ret_341, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_s8(__p0_341, __p1_341, __p2_341, __p3_341) __extension__ ({ \ + int8x16_t __ret_341; \ + int8x16_t __s0_341 = __p0_341; \ + int8x16_t __s2_341 = __p2_341; \ + int8x16_t __rev0_341; __rev0_341 = __builtin_shufflevector(__s0_341, __s0_341, __lane_reverse_128_8); \ + int8x16_t __rev2_341; __rev2_341 = __builtin_shufflevector(__s2_341, __s2_341, __lane_reverse_128_8); \ + __ret_341 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_341, __p3_341), __rev0_341, __p1_341); \ + __ret_341 = __builtin_shufflevector(__ret_341, __ret_341, __lane_reverse_128_8); \ __ret_341; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p8(__p0_342, __p1_342, __p2_342, __p3_342) __extension__ ({ \ - poly8x8_t __ret_342; \ - poly8x8_t __s0_342 = __p0_342; \ - poly8x16_t __s2_342 = __p2_342; \ - __ret_342 = vset_lane_p8(vgetq_lane_p8(__s2_342, __p3_342), __s0_342, __p1_342); \ +#define vcopyq_laneq_f32(__p0_342, __p1_342, __p2_342, __p3_342) __extension__ ({ \ + float32x4_t __ret_342; \ + float32x4_t __s0_342 = __p0_342; \ + float32x4_t __s2_342 = __p2_342; \ + __ret_342 = vsetq_lane_f32(vgetq_lane_f32(__s2_342, __p3_342), __s0_342, __p1_342); \ __ret_342; \ }) #else -#define vcopy_laneq_p8(__p0_343, __p1_343, __p2_343, __p3_343) __extension__ ({ \ - poly8x8_t __ret_343; \ - poly8x8_t __s0_343 = __p0_343; \ - poly8x16_t __s2_343 = __p2_343; \ - poly8x8_t __rev0_343; __rev0_343 = __builtin_shufflevector(__s0_343, __s0_343, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_343; __rev2_343 = __builtin_shufflevector(__s2_343, __s2_343, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_343 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_343, __p3_343), __rev0_343, __p1_343); \ - __ret_343 = __builtin_shufflevector(__ret_343, __ret_343, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_f32(__p0_343, __p1_343, __p2_343, __p3_343) __extension__ ({ \ + float32x4_t __ret_343; \ + float32x4_t __s0_343 = __p0_343; \ + float32x4_t __s2_343 = __p2_343; \ + float32x4_t __rev0_343; __rev0_343 = __builtin_shufflevector(__s0_343, __s0_343, __lane_reverse_128_32); \ + float32x4_t __rev2_343; __rev2_343 = __builtin_shufflevector(__s2_343, __s2_343, __lane_reverse_128_32); \ + __ret_343 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_343, __p3_343), __rev0_343, __p1_343); \ + __ret_343 = __builtin_shufflevector(__ret_343, __ret_343, __lane_reverse_128_32); \ __ret_343; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p16(__p0_344, __p1_344, __p2_344, __p3_344) __extension__ ({ \ - poly16x4_t __ret_344; \ - poly16x4_t __s0_344 = __p0_344; \ - poly16x8_t __s2_344 = __p2_344; \ - __ret_344 = vset_lane_p16(vgetq_lane_p16(__s2_344, __p3_344), __s0_344, __p1_344); \ +#define vcopyq_laneq_s32(__p0_344, __p1_344, __p2_344, __p3_344) __extension__ ({ \ + int32x4_t __ret_344; \ + int32x4_t __s0_344 = __p0_344; \ + int32x4_t __s2_344 = __p2_344; \ + __ret_344 = vsetq_lane_s32(vgetq_lane_s32(__s2_344, __p3_344), __s0_344, __p1_344); \ __ret_344; \ }) #else -#define vcopy_laneq_p16(__p0_345, __p1_345, __p2_345, __p3_345) __extension__ ({ \ - poly16x4_t __ret_345; \ - poly16x4_t __s0_345 = __p0_345; \ - poly16x8_t __s2_345 = __p2_345; \ - poly16x4_t __rev0_345; __rev0_345 = __builtin_shufflevector(__s0_345, __s0_345, 3, 2, 1, 0); \ - poly16x8_t __rev2_345; __rev2_345 = __builtin_shufflevector(__s2_345, __s2_345, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_345 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_345, __p3_345), __rev0_345, __p1_345); \ - __ret_345 = __builtin_shufflevector(__ret_345, __ret_345, 3, 2, 1, 0); \ +#define vcopyq_laneq_s32(__p0_345, __p1_345, __p2_345, __p3_345) __extension__ ({ \ + int32x4_t __ret_345; \ + int32x4_t __s0_345 = __p0_345; \ + int32x4_t __s2_345 = __p2_345; \ + int32x4_t __rev0_345; __rev0_345 = __builtin_shufflevector(__s0_345, __s0_345, __lane_reverse_128_32); \ + int32x4_t __rev2_345; __rev2_345 = __builtin_shufflevector(__s2_345, __s2_345, __lane_reverse_128_32); \ + __ret_345 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_345, __p3_345), __rev0_345, __p1_345); \ + __ret_345 = __builtin_shufflevector(__ret_345, __ret_345, __lane_reverse_128_32); \ __ret_345; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u8(__p0_346, __p1_346, __p2_346, __p3_346) __extension__ ({ \ - uint8x8_t __ret_346; \ - uint8x8_t __s0_346 = __p0_346; \ - uint8x16_t __s2_346 = __p2_346; \ - __ret_346 = vset_lane_u8(vgetq_lane_u8(__s2_346, __p3_346), __s0_346, __p1_346); \ +#define vcopyq_laneq_s64(__p0_346, __p1_346, __p2_346, __p3_346) __extension__ ({ \ + int64x2_t __ret_346; \ + int64x2_t __s0_346 = __p0_346; \ + int64x2_t __s2_346 = __p2_346; \ + __ret_346 = vsetq_lane_s64(vgetq_lane_s64(__s2_346, __p3_346), __s0_346, __p1_346); \ __ret_346; \ }) #else -#define vcopy_laneq_u8(__p0_347, __p1_347, __p2_347, __p3_347) __extension__ ({ \ - uint8x8_t __ret_347; \ - uint8x8_t __s0_347 = __p0_347; \ - uint8x16_t __s2_347 = __p2_347; \ - uint8x8_t __rev0_347; __rev0_347 = __builtin_shufflevector(__s0_347, __s0_347, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_347; __rev2_347 = __builtin_shufflevector(__s2_347, __s2_347, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_347 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_347, __p3_347), __rev0_347, __p1_347); \ - __ret_347 = __builtin_shufflevector(__ret_347, __ret_347, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopyq_laneq_s64(__p0_347, __p1_347, __p2_347, __p3_347) __extension__ ({ \ + int64x2_t __ret_347; \ + int64x2_t __s0_347 = __p0_347; \ + int64x2_t __s2_347 = __p2_347; \ + int64x2_t __rev0_347; __rev0_347 = __builtin_shufflevector(__s0_347, __s0_347, __lane_reverse_128_64); \ + int64x2_t __rev2_347; __rev2_347 = __builtin_shufflevector(__s2_347, __s2_347, __lane_reverse_128_64); \ + __ret_347 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_347, __p3_347), __rev0_347, __p1_347); \ + __ret_347 = __builtin_shufflevector(__ret_347, __ret_347, __lane_reverse_128_64); \ __ret_347; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u32(__p0_348, __p1_348, __p2_348, __p3_348) __extension__ ({ \ - uint32x2_t __ret_348; \ - uint32x2_t __s0_348 = __p0_348; \ - uint32x4_t __s2_348 = __p2_348; \ - __ret_348 = vset_lane_u32(vgetq_lane_u32(__s2_348, __p3_348), __s0_348, __p1_348); \ +#define vcopyq_laneq_mf8(__p0_348, __p1_348, __p2_348, __p3_348) __extension__ ({ \ + mfloat8x16_t __ret_348; \ + mfloat8x16_t __s0_348 = __p0_348; \ + mfloat8x16_t __s2_348 = __p2_348; \ + __ret_348 = vsetq_lane_mf8(vgetq_lane_mf8(__s2_348, __p3_348), __s0_348, __p1_348); \ __ret_348; \ }) #else -#define vcopy_laneq_u32(__p0_349, __p1_349, __p2_349, __p3_349) __extension__ ({ \ - uint32x2_t __ret_349; \ - uint32x2_t __s0_349 = __p0_349; \ - uint32x4_t __s2_349 = __p2_349; \ - uint32x2_t __rev0_349; __rev0_349 = __builtin_shufflevector(__s0_349, __s0_349, 1, 0); \ - uint32x4_t __rev2_349; __rev2_349 = __builtin_shufflevector(__s2_349, __s2_349, 3, 2, 1, 0); \ - __ret_349 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_349, __p3_349), __rev0_349, __p1_349); \ - __ret_349 = __builtin_shufflevector(__ret_349, __ret_349, 1, 0); \ +#define vcopyq_laneq_mf8(__p0_349, __p1_349, __p2_349, __p3_349) __extension__ ({ \ + mfloat8x16_t __ret_349; \ + mfloat8x16_t __s0_349 = __p0_349; \ + mfloat8x16_t __s2_349 = __p2_349; \ + mfloat8x16_t __rev0_349; __rev0_349 = __builtin_shufflevector(__s0_349, __s0_349, __lane_reverse_128_8); \ + mfloat8x16_t __rev2_349; __rev2_349 = __builtin_shufflevector(__s2_349, __s2_349, __lane_reverse_128_8); \ + __ret_349 = __noswap_vsetq_lane_mf8(__noswap_vgetq_lane_mf8(__rev2_349, __p3_349), __rev0_349, __p1_349); \ + __ret_349 = __builtin_shufflevector(__ret_349, __ret_349, __lane_reverse_128_8); \ __ret_349; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u64(__p0_350, __p1_350, __p2_350, __p3_350) __extension__ ({ \ - uint64x1_t __ret_350; \ - uint64x1_t __s0_350 = __p0_350; \ - uint64x2_t __s2_350 = __p2_350; \ - __ret_350 = vset_lane_u64(vgetq_lane_u64(__s2_350, __p3_350), __s0_350, __p1_350); \ +#define vcopyq_laneq_s16(__p0_350, __p1_350, __p2_350, __p3_350) __extension__ ({ \ + int16x8_t __ret_350; \ + int16x8_t __s0_350 = __p0_350; \ + int16x8_t __s2_350 = __p2_350; \ + __ret_350 = vsetq_lane_s16(vgetq_lane_s16(__s2_350, __p3_350), __s0_350, __p1_350); \ __ret_350; \ }) #else -#define vcopy_laneq_u64(__p0_351, __p1_351, __p2_351, __p3_351) __extension__ ({ \ - uint64x1_t __ret_351; \ - uint64x1_t __s0_351 = __p0_351; \ - uint64x2_t __s2_351 = __p2_351; \ - uint64x2_t __rev2_351; __rev2_351 = __builtin_shufflevector(__s2_351, __s2_351, 1, 0); \ - __ret_351 = vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_351, __p3_351), __s0_351, __p1_351); \ +#define vcopyq_laneq_s16(__p0_351, __p1_351, __p2_351, __p3_351) __extension__ ({ \ + int16x8_t __ret_351; \ + int16x8_t __s0_351 = __p0_351; \ + int16x8_t __s2_351 = __p2_351; \ + int16x8_t __rev0_351; __rev0_351 = __builtin_shufflevector(__s0_351, __s0_351, __lane_reverse_128_16); \ + int16x8_t __rev2_351; __rev2_351 = __builtin_shufflevector(__s2_351, __s2_351, __lane_reverse_128_16); \ + __ret_351 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_351, __p3_351), __rev0_351, __p1_351); \ + __ret_351 = __builtin_shufflevector(__ret_351, __ret_351, __lane_reverse_128_16); \ __ret_351; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u16(__p0_352, __p1_352, __p2_352, __p3_352) __extension__ ({ \ - uint16x4_t __ret_352; \ - uint16x4_t __s0_352 = __p0_352; \ - uint16x8_t __s2_352 = __p2_352; \ - __ret_352 = vset_lane_u16(vgetq_lane_u16(__s2_352, __p3_352), __s0_352, __p1_352); \ +#define vcopy_laneq_p8(__p0_352, __p1_352, __p2_352, __p3_352) __extension__ ({ \ + poly8x8_t __ret_352; \ + poly8x8_t __s0_352 = __p0_352; \ + poly8x16_t __s2_352 = __p2_352; \ + __ret_352 = vset_lane_p8(vgetq_lane_p8(__s2_352, __p3_352), __s0_352, __p1_352); \ __ret_352; \ }) #else -#define vcopy_laneq_u16(__p0_353, __p1_353, __p2_353, __p3_353) __extension__ ({ \ - uint16x4_t __ret_353; \ - uint16x4_t __s0_353 = __p0_353; \ - uint16x8_t __s2_353 = __p2_353; \ - uint16x4_t __rev0_353; __rev0_353 = __builtin_shufflevector(__s0_353, __s0_353, 3, 2, 1, 0); \ - uint16x8_t __rev2_353; __rev2_353 = __builtin_shufflevector(__s2_353, __s2_353, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_353 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_353, __p3_353), __rev0_353, __p1_353); \ - __ret_353 = __builtin_shufflevector(__ret_353, __ret_353, 3, 2, 1, 0); \ +#define vcopy_laneq_p8(__p0_353, __p1_353, __p2_353, __p3_353) __extension__ ({ \ + poly8x8_t __ret_353; \ + poly8x8_t __s0_353 = __p0_353; \ + poly8x16_t __s2_353 = __p2_353; \ + poly8x8_t __rev0_353; __rev0_353 = __builtin_shufflevector(__s0_353, __s0_353, __lane_reverse_64_8); \ + poly8x16_t __rev2_353; __rev2_353 = __builtin_shufflevector(__s2_353, __s2_353, __lane_reverse_128_8); \ + __ret_353 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_353, __p3_353), __rev0_353, __p1_353); \ + __ret_353 = __builtin_shufflevector(__ret_353, __ret_353, __lane_reverse_64_8); \ __ret_353; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s8(__p0_354, __p1_354, __p2_354, __p3_354) __extension__ ({ \ - int8x8_t __ret_354; \ - int8x8_t __s0_354 = __p0_354; \ - int8x16_t __s2_354 = __p2_354; \ - __ret_354 = vset_lane_s8(vgetq_lane_s8(__s2_354, __p3_354), __s0_354, __p1_354); \ +#define vcopy_laneq_p16(__p0_354, __p1_354, __p2_354, __p3_354) __extension__ ({ \ + poly16x4_t __ret_354; \ + poly16x4_t __s0_354 = __p0_354; \ + poly16x8_t __s2_354 = __p2_354; \ + __ret_354 = vset_lane_p16(vgetq_lane_p16(__s2_354, __p3_354), __s0_354, __p1_354); \ __ret_354; \ }) #else -#define vcopy_laneq_s8(__p0_355, __p1_355, __p2_355, __p3_355) __extension__ ({ \ - int8x8_t __ret_355; \ - int8x8_t __s0_355 = __p0_355; \ - int8x16_t __s2_355 = __p2_355; \ - int8x8_t __rev0_355; __rev0_355 = __builtin_shufflevector(__s0_355, __s0_355, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_355; __rev2_355 = __builtin_shufflevector(__s2_355, __s2_355, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_355 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_355, __p3_355), __rev0_355, __p1_355); \ - __ret_355 = __builtin_shufflevector(__ret_355, __ret_355, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vcopy_laneq_p16(__p0_355, __p1_355, __p2_355, __p3_355) __extension__ ({ \ + poly16x4_t __ret_355; \ + poly16x4_t __s0_355 = __p0_355; \ + poly16x8_t __s2_355 = __p2_355; \ + poly16x4_t __rev0_355; __rev0_355 = __builtin_shufflevector(__s0_355, __s0_355, __lane_reverse_64_16); \ + poly16x8_t __rev2_355; __rev2_355 = __builtin_shufflevector(__s2_355, __s2_355, __lane_reverse_128_16); \ + __ret_355 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_355, __p3_355), __rev0_355, __p1_355); \ + __ret_355 = __builtin_shufflevector(__ret_355, __ret_355, __lane_reverse_64_16); \ __ret_355; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f32(__p0_356, __p1_356, __p2_356, __p3_356) __extension__ ({ \ - float32x2_t __ret_356; \ - float32x2_t __s0_356 = __p0_356; \ - float32x4_t __s2_356 = __p2_356; \ - __ret_356 = vset_lane_f32(vgetq_lane_f32(__s2_356, __p3_356), __s0_356, __p1_356); \ +#define vcopy_laneq_u8(__p0_356, __p1_356, __p2_356, __p3_356) __extension__ ({ \ + uint8x8_t __ret_356; \ + uint8x8_t __s0_356 = __p0_356; \ + uint8x16_t __s2_356 = __p2_356; \ + __ret_356 = vset_lane_u8(vgetq_lane_u8(__s2_356, __p3_356), __s0_356, __p1_356); \ __ret_356; \ }) #else -#define vcopy_laneq_f32(__p0_357, __p1_357, __p2_357, __p3_357) __extension__ ({ \ - float32x2_t __ret_357; \ - float32x2_t __s0_357 = __p0_357; \ - float32x4_t __s2_357 = __p2_357; \ - float32x2_t __rev0_357; __rev0_357 = __builtin_shufflevector(__s0_357, __s0_357, 1, 0); \ - float32x4_t __rev2_357; __rev2_357 = __builtin_shufflevector(__s2_357, __s2_357, 3, 2, 1, 0); \ - __ret_357 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_357, __p3_357), __rev0_357, __p1_357); \ - __ret_357 = __builtin_shufflevector(__ret_357, __ret_357, 1, 0); \ +#define vcopy_laneq_u8(__p0_357, __p1_357, __p2_357, __p3_357) __extension__ ({ \ + uint8x8_t __ret_357; \ + uint8x8_t __s0_357 = __p0_357; \ + uint8x16_t __s2_357 = __p2_357; \ + uint8x8_t __rev0_357; __rev0_357 = __builtin_shufflevector(__s0_357, __s0_357, __lane_reverse_64_8); \ + uint8x16_t __rev2_357; __rev2_357 = __builtin_shufflevector(__s2_357, __s2_357, __lane_reverse_128_8); \ + __ret_357 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_357, __p3_357), __rev0_357, __p1_357); \ + __ret_357 = __builtin_shufflevector(__ret_357, __ret_357, __lane_reverse_64_8); \ __ret_357; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s32(__p0_358, __p1_358, __p2_358, __p3_358) __extension__ ({ \ - int32x2_t __ret_358; \ - int32x2_t __s0_358 = __p0_358; \ - int32x4_t __s2_358 = __p2_358; \ - __ret_358 = vset_lane_s32(vgetq_lane_s32(__s2_358, __p3_358), __s0_358, __p1_358); \ +#define vcopy_laneq_u32(__p0_358, __p1_358, __p2_358, __p3_358) __extension__ ({ \ + uint32x2_t __ret_358; \ + uint32x2_t __s0_358 = __p0_358; \ + uint32x4_t __s2_358 = __p2_358; \ + __ret_358 = vset_lane_u32(vgetq_lane_u32(__s2_358, __p3_358), __s0_358, __p1_358); \ __ret_358; \ }) #else -#define vcopy_laneq_s32(__p0_359, __p1_359, __p2_359, __p3_359) __extension__ ({ \ - int32x2_t __ret_359; \ - int32x2_t __s0_359 = __p0_359; \ - int32x4_t __s2_359 = __p2_359; \ - int32x2_t __rev0_359; __rev0_359 = __builtin_shufflevector(__s0_359, __s0_359, 1, 0); \ - int32x4_t __rev2_359; __rev2_359 = __builtin_shufflevector(__s2_359, __s2_359, 3, 2, 1, 0); \ - __ret_359 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_359, __p3_359), __rev0_359, __p1_359); \ - __ret_359 = __builtin_shufflevector(__ret_359, __ret_359, 1, 0); \ +#define vcopy_laneq_u32(__p0_359, __p1_359, __p2_359, __p3_359) __extension__ ({ \ + uint32x2_t __ret_359; \ + uint32x2_t __s0_359 = __p0_359; \ + uint32x4_t __s2_359 = __p2_359; \ + uint32x2_t __rev0_359; __rev0_359 = __builtin_shufflevector(__s0_359, __s0_359, __lane_reverse_64_32); \ + uint32x4_t __rev2_359; __rev2_359 = __builtin_shufflevector(__s2_359, __s2_359, __lane_reverse_128_32); \ + __ret_359 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_359, __p3_359), __rev0_359, __p1_359); \ + __ret_359 = __builtin_shufflevector(__ret_359, __ret_359, __lane_reverse_64_32); \ __ret_359; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s64(__p0_360, __p1_360, __p2_360, __p3_360) __extension__ ({ \ - int64x1_t __ret_360; \ - int64x1_t __s0_360 = __p0_360; \ - int64x2_t __s2_360 = __p2_360; \ - __ret_360 = vset_lane_s64(vgetq_lane_s64(__s2_360, __p3_360), __s0_360, __p1_360); \ +#define vcopy_laneq_u64(__p0_360, __p1_360, __p2_360, __p3_360) __extension__ ({ \ + uint64x1_t __ret_360; \ + uint64x1_t __s0_360 = __p0_360; \ + uint64x2_t __s2_360 = __p2_360; \ + __ret_360 = vset_lane_u64(vgetq_lane_u64(__s2_360, __p3_360), __s0_360, __p1_360); \ __ret_360; \ }) #else -#define vcopy_laneq_s64(__p0_361, __p1_361, __p2_361, __p3_361) __extension__ ({ \ - int64x1_t __ret_361; \ - int64x1_t __s0_361 = __p0_361; \ - int64x2_t __s2_361 = __p2_361; \ - int64x2_t __rev2_361; __rev2_361 = __builtin_shufflevector(__s2_361, __s2_361, 1, 0); \ - __ret_361 = vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_361, __p3_361), __s0_361, __p1_361); \ +#define vcopy_laneq_u64(__p0_361, __p1_361, __p2_361, __p3_361) __extension__ ({ \ + uint64x1_t __ret_361; \ + uint64x1_t __s0_361 = __p0_361; \ + uint64x2_t __s2_361 = __p2_361; \ + uint64x2_t __rev2_361; __rev2_361 = __builtin_shufflevector(__s2_361, __s2_361, __lane_reverse_128_64); \ + __ret_361 = vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_361, __p3_361), __s0_361, __p1_361); \ __ret_361; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s16(__p0_362, __p1_362, __p2_362, __p3_362) __extension__ ({ \ - int16x4_t __ret_362; \ - int16x4_t __s0_362 = __p0_362; \ - int16x8_t __s2_362 = __p2_362; \ - __ret_362 = vset_lane_s16(vgetq_lane_s16(__s2_362, __p3_362), __s0_362, __p1_362); \ +#define vcopy_laneq_u16(__p0_362, __p1_362, __p2_362, __p3_362) __extension__ ({ \ + uint16x4_t __ret_362; \ + uint16x4_t __s0_362 = __p0_362; \ + uint16x8_t __s2_362 = __p2_362; \ + __ret_362 = vset_lane_u16(vgetq_lane_u16(__s2_362, __p3_362), __s0_362, __p1_362); \ __ret_362; \ }) #else -#define vcopy_laneq_s16(__p0_363, __p1_363, __p2_363, __p3_363) __extension__ ({ \ - int16x4_t __ret_363; \ - int16x4_t __s0_363 = __p0_363; \ - int16x8_t __s2_363 = __p2_363; \ - int16x4_t __rev0_363; __rev0_363 = __builtin_shufflevector(__s0_363, __s0_363, 3, 2, 1, 0); \ - int16x8_t __rev2_363; __rev2_363 = __builtin_shufflevector(__s2_363, __s2_363, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_363 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_363, __p3_363), __rev0_363, __p1_363); \ - __ret_363 = __builtin_shufflevector(__ret_363, __ret_363, 3, 2, 1, 0); \ +#define vcopy_laneq_u16(__p0_363, __p1_363, __p2_363, __p3_363) __extension__ ({ \ + uint16x4_t __ret_363; \ + uint16x4_t __s0_363 = __p0_363; \ + uint16x8_t __s2_363 = __p2_363; \ + uint16x4_t __rev0_363; __rev0_363 = __builtin_shufflevector(__s0_363, __s0_363, __lane_reverse_64_16); \ + uint16x8_t __rev2_363; __rev2_363 = __builtin_shufflevector(__s2_363, __s2_363, __lane_reverse_128_16); \ + __ret_363 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_363, __p3_363), __rev0_363, __p1_363); \ + __ret_363 = __builtin_shufflevector(__ret_363, __ret_363, __lane_reverse_64_16); \ __ret_363; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_s8(__p0_364, __p1_364, __p2_364, __p3_364) __extension__ ({ \ + int8x8_t __ret_364; \ + int8x8_t __s0_364 = __p0_364; \ + int8x16_t __s2_364 = __p2_364; \ + __ret_364 = vset_lane_s8(vgetq_lane_s8(__s2_364, __p3_364), __s0_364, __p1_364); \ + __ret_364; \ +}) +#else +#define vcopy_laneq_s8(__p0_365, __p1_365, __p2_365, __p3_365) __extension__ ({ \ + int8x8_t __ret_365; \ + int8x8_t __s0_365 = __p0_365; \ + int8x16_t __s2_365 = __p2_365; \ + int8x8_t __rev0_365; __rev0_365 = __builtin_shufflevector(__s0_365, __s0_365, __lane_reverse_64_8); \ + int8x16_t __rev2_365; __rev2_365 = __builtin_shufflevector(__s2_365, __s2_365, __lane_reverse_128_8); \ + __ret_365 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_365, __p3_365), __rev0_365, __p1_365); \ + __ret_365 = __builtin_shufflevector(__ret_365, __ret_365, __lane_reverse_64_8); \ + __ret_365; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_f32(__p0_366, __p1_366, __p2_366, __p3_366) __extension__ ({ \ + float32x2_t __ret_366; \ + float32x2_t __s0_366 = __p0_366; \ + float32x4_t __s2_366 = __p2_366; \ + __ret_366 = vset_lane_f32(vgetq_lane_f32(__s2_366, __p3_366), __s0_366, __p1_366); \ + __ret_366; \ +}) +#else +#define vcopy_laneq_f32(__p0_367, __p1_367, __p2_367, __p3_367) __extension__ ({ \ + float32x2_t __ret_367; \ + float32x2_t __s0_367 = __p0_367; \ + float32x4_t __s2_367 = __p2_367; \ + float32x2_t __rev0_367; __rev0_367 = __builtin_shufflevector(__s0_367, __s0_367, __lane_reverse_64_32); \ + float32x4_t __rev2_367; __rev2_367 = __builtin_shufflevector(__s2_367, __s2_367, __lane_reverse_128_32); \ + __ret_367 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_367, __p3_367), __rev0_367, __p1_367); \ + __ret_367 = __builtin_shufflevector(__ret_367, __ret_367, __lane_reverse_64_32); \ + __ret_367; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_s32(__p0_368, __p1_368, __p2_368, __p3_368) __extension__ ({ \ + int32x2_t __ret_368; \ + int32x2_t __s0_368 = __p0_368; \ + int32x4_t __s2_368 = __p2_368; \ + __ret_368 = vset_lane_s32(vgetq_lane_s32(__s2_368, __p3_368), __s0_368, __p1_368); \ + __ret_368; \ +}) +#else +#define vcopy_laneq_s32(__p0_369, __p1_369, __p2_369, __p3_369) __extension__ ({ \ + int32x2_t __ret_369; \ + int32x2_t __s0_369 = __p0_369; \ + int32x4_t __s2_369 = __p2_369; \ + int32x2_t __rev0_369; __rev0_369 = __builtin_shufflevector(__s0_369, __s0_369, __lane_reverse_64_32); \ + int32x4_t __rev2_369; __rev2_369 = __builtin_shufflevector(__s2_369, __s2_369, __lane_reverse_128_32); \ + __ret_369 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_369, __p3_369), __rev0_369, __p1_369); \ + __ret_369 = __builtin_shufflevector(__ret_369, __ret_369, __lane_reverse_64_32); \ + __ret_369; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_s64(__p0_370, __p1_370, __p2_370, __p3_370) __extension__ ({ \ + int64x1_t __ret_370; \ + int64x1_t __s0_370 = __p0_370; \ + int64x2_t __s2_370 = __p2_370; \ + __ret_370 = vset_lane_s64(vgetq_lane_s64(__s2_370, __p3_370), __s0_370, __p1_370); \ + __ret_370; \ +}) +#else +#define vcopy_laneq_s64(__p0_371, __p1_371, __p2_371, __p3_371) __extension__ ({ \ + int64x1_t __ret_371; \ + int64x1_t __s0_371 = __p0_371; \ + int64x2_t __s2_371 = __p2_371; \ + int64x2_t __rev2_371; __rev2_371 = __builtin_shufflevector(__s2_371, __s2_371, __lane_reverse_128_64); \ + __ret_371 = vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_371, __p3_371), __s0_371, __p1_371); \ + __ret_371; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_mf8(__p0_372, __p1_372, __p2_372, __p3_372) __extension__ ({ \ + mfloat8x8_t __ret_372; \ + mfloat8x8_t __s0_372 = __p0_372; \ + mfloat8x16_t __s2_372 = __p2_372; \ + __ret_372 = vset_lane_mf8(vgetq_lane_mf8(__s2_372, __p3_372), __s0_372, __p1_372); \ + __ret_372; \ +}) +#else +#define vcopy_laneq_mf8(__p0_373, __p1_373, __p2_373, __p3_373) __extension__ ({ \ + mfloat8x8_t __ret_373; \ + mfloat8x8_t __s0_373 = __p0_373; \ + mfloat8x16_t __s2_373 = __p2_373; \ + mfloat8x8_t __rev0_373; __rev0_373 = __builtin_shufflevector(__s0_373, __s0_373, __lane_reverse_64_8); \ + mfloat8x16_t __rev2_373; __rev2_373 = __builtin_shufflevector(__s2_373, __s2_373, __lane_reverse_128_8); \ + __ret_373 = __noswap_vset_lane_mf8(__noswap_vgetq_lane_mf8(__rev2_373, __p3_373), __rev0_373, __p1_373); \ + __ret_373 = __builtin_shufflevector(__ret_373, __ret_373, __lane_reverse_64_8); \ + __ret_373; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcopy_laneq_s16(__p0_374, __p1_374, __p2_374, __p3_374) __extension__ ({ \ + int16x4_t __ret_374; \ + int16x4_t __s0_374 = __p0_374; \ + int16x8_t __s2_374 = __p2_374; \ + __ret_374 = vset_lane_s16(vgetq_lane_s16(__s2_374, __p3_374), __s0_374, __p1_374); \ + __ret_374; \ +}) +#else +#define vcopy_laneq_s16(__p0_375, __p1_375, __p2_375, __p3_375) __extension__ ({ \ + int16x4_t __ret_375; \ + int16x4_t __s0_375 = __p0_375; \ + int16x8_t __s2_375 = __p2_375; \ + int16x4_t __rev0_375; __rev0_375 = __builtin_shufflevector(__s0_375, __s0_375, __lane_reverse_64_16); \ + int16x8_t __rev2_375; __rev2_375 = __builtin_shufflevector(__s2_375, __s2_375, __lane_reverse_128_16); \ + __ret_375 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_375, __p3_375), __rev0_375, __p1_375); \ + __ret_375 = __builtin_shufflevector(__ret_375, __ret_375, __lane_reverse_64_16); \ + __ret_375; \ +}) +#endif + #define vcreate_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ uint64_t __promote = __p0; \ - __ret = (poly64x1_t)(__promote); \ + __ret = __builtin_bit_cast(poly64x1_t, __promote); \ __ret; \ }) #define vcreate_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ uint64_t __promote = __p0; \ - __ret = (float64x1_t)(__promote); \ + __ret = __builtin_bit_cast(float64x1_t, __promote); \ __ret; \ }) __ai __attribute__((target("neon"))) float32_t vcvts_f32_s32(int32_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_s32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vcvts_f32_s32(__p0)); return __ret; } __ai __attribute__((target("neon"))) float32_t vcvts_f32_u32(uint32_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvts_f32_u32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vcvts_f32_u32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_f64(__builtin_bit_cast(int8x16_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_f64(__builtin_bit_cast(int8x16_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) float32x2_t __noswap_vcvt_f32_f64(float64x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvt_f32_f64((int8x16_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvt_f32_f64(__builtin_bit_cast(int8x16_t, __p0), 9)); return __ret; } #endif __ai __attribute__((target("neon"))) float64_t vcvtd_f64_s64(int64_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_s64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vcvtd_f64_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) float64_t vcvtd_f64_u64(uint64_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vcvtd_f64_u64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vcvtd_f64_u64(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_f64_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vcvtq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_f64_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -48564,46 +49588,46 @@ __ai __attribute__((target("neon"))) float64x2_t vcvtq_f64_u64(uint64x2_t __p0) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vcvtq_f64_s64(int64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_f64_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vcvtq_f64_s64(int64x2_t __p0) { float64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcvtq_f64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_f64_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vcvt_f64_u64(uint64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vcvt_f64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vcvt_f64_s64(int64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcvt_f64_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vcvt_f64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvt_f64_f32(__builtin_bit_cast(int8x8_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvt_f64_f32(__builtin_bit_cast(int8x8_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) float64x2_t __noswap_vcvt_f64_f32(float32x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcvt_f64_f32((int8x8_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvt_f64_f32(__builtin_bit_cast(int8x8_t, __p0), 42)); return __ret; } #endif @@ -48617,10 +49641,10 @@ __ai __attribute__((target("neon"))) float16x8_t vcvt_high_f16_f32(float16x4_t _ #else __ai __attribute__((target("neon"))) float16x8_t vcvt_high_f16_f32(float16x4_t __p0, float32x4_t __p1) { float16x8_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vcombine_f16(__rev0, __noswap_vcvt_f16_f32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -48634,9 +49658,9 @@ __ai __attribute__((target("neon"))) float32x4_t vcvt_high_f32_f16(float16x8_t _ #else __ai __attribute__((target("neon"))) float32x4_t vcvt_high_f32_f16(float16x8_t __p0) { float32x4_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vcvt_f32_f16(__noswap_vget_high_f16(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -48650,10 +49674,10 @@ __ai __attribute__((target("neon"))) float32x4_t vcvt_high_f32_f64(float32x2_t _ #else __ai __attribute__((target("neon"))) float32x4_t vcvt_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvt_f32_f64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -48667,9 +49691,9 @@ __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t _ #else __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t __p0) { float64x2_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap_vcvt_f64_f32(__noswap_vget_high_f32(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -48677,29 +49701,29 @@ __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t _ #define vcvts_n_f32_u32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ uint32_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_u32(__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vcvts_n_f32_u32(__s0, __p1)); \ __ret; \ }) #define vcvts_n_f32_s32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ int32_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vcvts_n_f32_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vcvts_n_f32_s32(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_n_f64_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vcvtq_n_f64_u64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_n_f64_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -48708,16 +49732,16 @@ __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t _ #define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ int64x2_t __s0 = __p0; \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_n_f64_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vcvtq_n_f64_s64(__p0, __p1) __extension__ ({ \ float64x2_t __ret; \ int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vcvtq_n_f64_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcvtq_n_f64_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -48725,47 +49749,47 @@ __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t _ #define vcvt_n_f64_u64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vcvt_n_f64_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #define vcvt_n_f64_s64(__p0, __p1) __extension__ ({ \ float64x1_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (float64x1_t) __builtin_neon_vcvt_n_f64_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vcvt_n_f64_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #define vcvtd_n_f64_u64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vcvtd_n_f64_u64(__s0, __p1)); \ __ret; \ }) #define vcvtd_n_f64_s64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ int64_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vcvtd_n_f64_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vcvtd_n_f64_s64(__s0, __p1)); \ __ret; \ }) #define vcvts_n_s32_f32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ float32_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vcvts_n_s32_f32(__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvts_n_s32_f32(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__s0, __p1, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtq_n_s64_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 35)); \ __ret; \ }) #else #define vcvtq_n_s64_f64(__p0, __p1) __extension__ ({ \ int64x2_t __ret; \ float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vcvtq_n_s64_v((int8x16_t)__rev0, __p1, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtq_n_s64_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -48773,35 +49797,35 @@ __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t _ #define vcvt_n_s64_f64(__p0, __p1) __extension__ ({ \ int64x1_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (int64x1_t) __builtin_neon_vcvt_n_s64_v((int8x8_t)__s0, __p1, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvt_n_s64_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 3)); \ __ret; \ }) #define vcvtd_n_s64_f64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ float64_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vcvtd_n_s64_f64(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtd_n_s64_f64(__s0, __p1)); \ __ret; \ }) #define vcvts_n_u32_f32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ float32_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vcvts_n_u32_f32(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvts_n_u32_f32(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__s0, __p1, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtq_n_u64_v(__builtin_bit_cast(int8x16_t, __s0), __p1, 51)); \ __ret; \ }) #else #define vcvtq_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64x2_t __ret; \ float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vcvtq_n_u64_v((int8x16_t)__rev0, __p1, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtq_n_u64_v(__builtin_bit_cast(int8x16_t, __rev0), __p1, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -48809,347 +49833,347 @@ __ai __attribute__((target("neon"))) float64x2_t vcvt_high_f64_f32(float32x4_t _ #define vcvt_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64x1_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (uint64x1_t) __builtin_neon_vcvt_n_u64_v((int8x8_t)__s0, __p1, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvt_n_u64_v(__builtin_bit_cast(int8x8_t, __s0), __p1, 19)); \ __ret; \ }) #define vcvtd_n_u64_f64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ float64_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vcvtd_n_u64_f64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtd_n_u64_f64(__s0, __p1)); \ __ret; \ }) __ai __attribute__((target("neon"))) int32_t vcvts_s32_f32(float32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvts_s32_f32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvts_s32_f32(__p0)); return __ret; } __ai __attribute__((target("neon"))) int64_t vcvtd_s64_f64(float64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtd_s64_f64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtd_s64_f64(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtq_s64_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vcvtq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vcvtq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtq_s64_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vcvt_s64_f64(float64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvt_s64_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvt_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcvts_u32_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvts_u32_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvts_u32_f32(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcvtd_u64_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtd_u64_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtd_u64_f64(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtq_u64_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcvtq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcvtq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtq_u64_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcvt_u64_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvt_u64_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvt_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } __ai __attribute__((target("neon"))) int32_t vcvtas_s32_f32(float32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtas_s32_f32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtas_s32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtaq_s64_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vcvtaq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vcvtaq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtaq_s64_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vcvta_s64_f64(float64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvta_s64_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvta_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int64_t vcvtad_s64_f64(float64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtad_s64_f64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtad_s64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcvtas_u32_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtas_u32_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtas_u32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtaq_u64_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcvtaq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcvtaq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtaq_u64_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcvta_u64_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvta_u64_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvta_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcvtad_u64_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtad_u64_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtad_u64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vcvtms_s32_f32(float32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtms_s32_f32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtms_s32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtmq_s64_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vcvtmq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vcvtmq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtmq_s64_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vcvtm_s64_f64(float64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtm_s64_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtm_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int64_t vcvtmd_s64_f64(float64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtmd_s64_f64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtmd_s64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcvtms_u32_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtms_u32_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtms_u32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtmq_u64_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcvtmq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcvtmq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtmq_u64_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcvtm_u64_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtm_u64_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtm_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcvtmd_u64_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtmd_u64_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtmd_u64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vcvtns_s32_f32(float32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtns_s32_f32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtns_s32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtnq_s64_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vcvtnq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vcvtnq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtnq_s64_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vcvtn_s64_f64(float64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtn_s64_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtn_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int64_t vcvtnd_s64_f64(float64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtnd_s64_f64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtnd_s64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcvtns_u32_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtns_u32_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtns_u32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtnq_u64_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcvtnq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcvtnq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtnq_u64_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcvtn_u64_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtn_u64_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtn_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcvtnd_u64_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtnd_u64_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtnd_u64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vcvtps_s32_f32(float32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vcvtps_s32_f32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vcvtps_s32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtpq_s64_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vcvtpq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vcvtpq_s64_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vcvtpq_s64_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vcvtp_s64_f64(float64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vcvtp_s64_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vcvtp_s64_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int64_t vcvtpd_s64_f64(float64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vcvtpd_s64_f64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vcvtpd_s64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vcvtps_u32_f32(float32_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vcvtps_u32_f32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vcvtps_u32_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__p0, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtpq_u64_v(__builtin_bit_cast(int8x16_t, __p0), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vcvtpq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vcvtpq_u64_v((int8x16_t)__rev0, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vcvtpq_u64_v(__builtin_bit_cast(int8x16_t, __rev0), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vcvtp_u64_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vcvtp_u64_v((int8x8_t)__p0, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vcvtp_u64_v(__builtin_bit_cast(int8x8_t, __p0), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vcvtpd_u64_f64(float64_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vcvtpd_u64_f64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vcvtpd_u64_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) float32_t vcvtxd_f32_f64(float64_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vcvtxd_f32_f64(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vcvtxd_f32_f64(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvtx_f32_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvtx_f32_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) float32x2_t __noswap_vcvtx_f32_f64(float64x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vcvtx_f32_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vcvtx_f32_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #endif @@ -49163,10 +50187,10 @@ __ai __attribute__((target("neon"))) float32x4_t vcvtx_high_f32_f64(float32x2_t #else __ai __attribute__((target("neon"))) float32x4_t vcvtx_high_f32_f64(float32x2_t __p0, float64x2_t __p1) { float32x4_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vcombine_f32(__rev0, __noswap_vcvtx_f32_f64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -49180,10 +50204,10 @@ __ai __attribute__((target("neon"))) float64x2_t vdivq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vdivq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -49197,10 +50221,10 @@ __ai __attribute__((target("neon"))) float32x4_t vdivq_f32(float32x4_t __p0, flo #else __ai __attribute__((target("neon"))) float32x4_t vdivq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -49219,10 +50243,10 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #else __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 / __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -49231,15 +50255,15 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((poly8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__s0, __p1)); \ __ret; \ }) #else #define vdupb_lane_p8(__p0, __p1) __extension__ ({ \ poly8_t __ret; \ poly8x8_t __s0 = __p0; \ - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8_t) __builtin_neon_vdupb_lane_i8((poly8x8_t)__rev0, __p1); \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_lane_i8(__rev0, __p1)); \ __ret; \ }) #endif @@ -49248,15 +50272,15 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vduph_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - __ret = (poly16_t) __builtin_neon_vduph_lane_i16((poly16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__s0, __p1)); \ __ret; \ }) #else #define vduph_lane_p16(__p0, __p1) __extension__ ({ \ poly16_t __ret; \ poly16x4_t __s0 = __p0; \ - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (poly16_t) __builtin_neon_vduph_lane_i16((poly16x4_t)__rev0, __p1); \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_lane_i16(__rev0, __p1)); \ __ret; \ }) #endif @@ -49265,15 +50289,15 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #else #define vdupb_lane_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8x8_t __s0 = __p0; \ - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -49282,15 +50306,15 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdups_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vdups_lane_i32(__builtin_bit_cast(int32x2_t, __s0), __p1)); \ __ret; \ }) #else #define vdups_lane_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32x2_t __s0 = __p0; \ - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__rev0, __p1); \ + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vdups_lane_i32(__builtin_bit_cast(int32x2_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -49298,22 +50322,22 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupd_lane_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64x1_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vdupd_lane_i64((int64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vdupd_lane_i64(__builtin_bit_cast(int64x1_t, __s0), __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vduph_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #else #define vduph_lane_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16x4_t __s0 = __p0; \ - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__rev0, __p1); \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -49322,15 +50346,15 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __s0), __p1)); \ __ret; \ }) #else #define vdupb_lane_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8x8_t __s0 = __p0; \ - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8_t) __builtin_neon_vdupb_lane_i8((int8x8_t)__rev0, __p1); \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vdupb_lane_i8(__builtin_bit_cast(int8x8_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -49338,22 +50362,22 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupd_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vdupd_lane_f64((float64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vdupd_lane_f64(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vdups_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vdups_lane_f32((float32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vdups_lane_f32(__s0, __p1)); \ __ret; \ }) #else #define vdups_lane_f32(__p0, __p1) __extension__ ({ \ float32_t __ret; \ float32x2_t __s0 = __p0; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float32_t) __builtin_neon_vdups_lane_f32((float32x2_t)__rev0, __p1); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vdups_lane_f32(__rev0, __p1)); \ __ret; \ }) #endif @@ -49362,15 +50386,15 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdups_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vdups_lane_i32(__builtin_bit_cast(int32x2_t, __s0), __p1)); \ __ret; \ }) #else #define vdups_lane_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32x2_t __s0 = __p0; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int32_t) __builtin_neon_vdups_lane_i32((int32x2_t)__rev0, __p1); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vdups_lane_i32(__builtin_bit_cast(int32x2_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -49378,22 +50402,39 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vdupd_lane_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64x1_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vdupd_lane_i64((int64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vdupd_lane_i64(__builtin_bit_cast(int64x1_t, __s0), __p1)); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vdupb_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vdupb_lane_mf8(__s0, __p1)); \ + __ret; \ +}) +#else +#define vdupb_lane_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x8_t __s0 = __p0; \ + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vdupb_lane_mf8(__rev0, __p1)); \ + __ret; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ #define vduph_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __s0), __p1)); \ __ret; \ }) #else #define vduph_lane_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16x4_t __s0 = __p0; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int16_t) __builtin_neon_vduph_lane_i16((int16x4_t)__rev0, __p1); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vduph_lane_i16(__builtin_bit_cast(int16x4_t, __rev0), __p1)); \ __ret; \ }) #endif @@ -49402,786 +50443,839 @@ __ai __attribute__((target("neon"))) float32x2_t vdiv_f32(float32x2_t __p0, floa #define vduph_lane_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vduph_lane_f16((float16x4_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vduph_lane_f16(__s0, __p1)); \ __ret; \ }) #else #define vduph_lane_f16(__p0, __p1) __extension__ ({ \ float16_t __ret; \ float16x4_t __s0 = __p0; \ - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vduph_lane_f16((float16x4_t)__rev0, __p1); \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vduph_lane_f16(__rev0, __p1)); \ __ret; \ }) #endif -#define vdup_lane_p64(__p0_364, __p1_364) __extension__ ({ \ - poly64x1_t __ret_364; \ - poly64x1_t __s0_364 = __p0_364; \ - __ret_364 = splat_lane_p64(__s0_364, __p1_364); \ - __ret_364; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_p64(__p0_365, __p1_365) __extension__ ({ \ - poly64x2_t __ret_365; \ - poly64x1_t __s0_365 = __p0_365; \ - __ret_365 = splatq_lane_p64(__s0_365, __p1_365); \ - __ret_365; \ -}) -#else -#define vdupq_lane_p64(__p0_366, __p1_366) __extension__ ({ \ - poly64x2_t __ret_366; \ - poly64x1_t __s0_366 = __p0_366; \ - __ret_366 = __noswap_splatq_lane_p64(__s0_366, __p1_366); \ - __ret_366 = __builtin_shufflevector(__ret_366, __ret_366, 1, 0); \ - __ret_366; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_f64(__p0_367, __p1_367) __extension__ ({ \ - float64x2_t __ret_367; \ - float64x1_t __s0_367 = __p0_367; \ - __ret_367 = splatq_lane_f64(__s0_367, __p1_367); \ - __ret_367; \ -}) -#else -#define vdupq_lane_f64(__p0_368, __p1_368) __extension__ ({ \ - float64x2_t __ret_368; \ - float64x1_t __s0_368 = __p0_368; \ - __ret_368 = __noswap_splatq_lane_f64(__s0_368, __p1_368); \ - __ret_368 = __builtin_shufflevector(__ret_368, __ret_368, 1, 0); \ - __ret_368; \ -}) -#endif - -#define vdup_lane_f64(__p0_369, __p1_369) __extension__ ({ \ - float64x1_t __ret_369; \ - float64x1_t __s0_369 = __p0_369; \ - __ret_369 = splat_lane_f64(__s0_369, __p1_369); \ - __ret_369; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8_t __ret; \ - poly8x16_t __s0 = __p0; \ - __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((poly8x16_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ - poly8_t __ret; \ - poly8x16_t __s0 = __p0; \ - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly8_t) __builtin_neon_vdupb_laneq_i8((poly8x16_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16_t __ret; \ - poly16x8_t __s0 = __p0; \ - __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((poly16x8_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ - poly16_t __ret; \ - poly16x8_t __s0 = __p0; \ - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (poly16_t) __builtin_neon_vduph_laneq_i16((poly16x8_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8_t __ret; \ - uint8x16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ - uint8_t __ret; \ - uint8x16_t __s0 = __p0; \ - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32_t __ret; \ - uint32x4_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ - uint32_t __ret; \ - uint32x4_t __s0 = __p0; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (uint32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64_t __ret; \ - uint64x2_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ - uint64_t __ret; \ - uint64x2_t __s0 = __p0; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (uint64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16_t __ret; \ - uint16x8_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ - uint16_t __ret; \ - uint16x8_t __s0 = __p0; \ - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (uint16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ - int8_t __ret; \ - int8x16_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ - int8_t __ret; \ - int8x16_t __s0 = __p0; \ - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int8_t) __builtin_neon_vdupb_laneq_i8((int8x16_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ - float64_t __ret; \ - float64x2_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((float64x2_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ - float64_t __ret; \ - float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float64_t) __builtin_neon_vdupd_laneq_f64((float64x2_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ - float32_t __ret; \ - float32x4_t __s0 = __p0; \ - __ret = (float32_t) __builtin_neon_vdups_laneq_f32((float32x4_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ - float32_t __ret; \ - float32x4_t __s0 = __p0; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (float32_t) __builtin_neon_vdups_laneq_f32((float32x4_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ - int32_t __ret; \ - int32x4_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ - int32_t __ret; \ - int32x4_t __s0 = __p0; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - __ret = (int32_t) __builtin_neon_vdups_laneq_i32((int32x4_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ - int64_t __ret; \ - int64x2_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ - int64_t __ret; \ - int64x2_t __s0 = __p0; \ - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (int64_t) __builtin_neon_vdupd_laneq_i64((int64x2_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ - int16_t __ret; \ - int16x8_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ - int16_t __ret; \ - int16x8_t __s0 = __p0; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16_t) __builtin_neon_vduph_laneq_i16((int16x8_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ - float16_t __ret; \ - float16x8_t __s0 = __p0; \ - __ret = (float16_t) __builtin_neon_vduph_laneq_f16((float16x8_t)__s0, __p1); \ - __ret; \ -}) -#else -#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ - float16_t __ret; \ - float16x8_t __s0 = __p0; \ - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (float16_t) __builtin_neon_vduph_laneq_f16((float16x8_t)__rev0, __p1); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p8(__p0_370, __p1_370) __extension__ ({ \ - poly8x8_t __ret_370; \ - poly8x16_t __s0_370 = __p0_370; \ - __ret_370 = splat_laneq_p8(__s0_370, __p1_370); \ - __ret_370; \ -}) -#else -#define vdup_laneq_p8(__p0_371, __p1_371) __extension__ ({ \ - poly8x8_t __ret_371; \ - poly8x16_t __s0_371 = __p0_371; \ - poly8x16_t __rev0_371; __rev0_371 = __builtin_shufflevector(__s0_371, __s0_371, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_371 = __noswap_splat_laneq_p8(__rev0_371, __p1_371); \ - __ret_371 = __builtin_shufflevector(__ret_371, __ret_371, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_371; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p64(__p0_372, __p1_372) __extension__ ({ \ - poly64x1_t __ret_372; \ - poly64x2_t __s0_372 = __p0_372; \ - __ret_372 = splat_laneq_p64(__s0_372, __p1_372); \ - __ret_372; \ -}) -#else -#define vdup_laneq_p64(__p0_373, __p1_373) __extension__ ({ \ - poly64x1_t __ret_373; \ - poly64x2_t __s0_373 = __p0_373; \ - poly64x2_t __rev0_373; __rev0_373 = __builtin_shufflevector(__s0_373, __s0_373, 1, 0); \ - __ret_373 = __noswap_splat_laneq_p64(__rev0_373, __p1_373); \ - __ret_373; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p16(__p0_374, __p1_374) __extension__ ({ \ - poly16x4_t __ret_374; \ - poly16x8_t __s0_374 = __p0_374; \ - __ret_374 = splat_laneq_p16(__s0_374, __p1_374); \ - __ret_374; \ -}) -#else -#define vdup_laneq_p16(__p0_375, __p1_375) __extension__ ({ \ - poly16x4_t __ret_375; \ - poly16x8_t __s0_375 = __p0_375; \ - poly16x8_t __rev0_375; __rev0_375 = __builtin_shufflevector(__s0_375, __s0_375, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_375 = __noswap_splat_laneq_p16(__rev0_375, __p1_375); \ - __ret_375 = __builtin_shufflevector(__ret_375, __ret_375, 3, 2, 1, 0); \ - __ret_375; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p8(__p0_376, __p1_376) __extension__ ({ \ - poly8x16_t __ret_376; \ - poly8x16_t __s0_376 = __p0_376; \ - __ret_376 = splatq_laneq_p8(__s0_376, __p1_376); \ +#define vdup_lane_p64(__p0_376, __p1_376) __extension__ ({ \ + poly64x1_t __ret_376; \ + poly64x1_t __s0_376 = __p0_376; \ + __ret_376 = splat_lane_p64(__s0_376, __p1_376); \ __ret_376; \ }) -#else -#define vdupq_laneq_p8(__p0_377, __p1_377) __extension__ ({ \ - poly8x16_t __ret_377; \ - poly8x16_t __s0_377 = __p0_377; \ - poly8x16_t __rev0_377; __rev0_377 = __builtin_shufflevector(__s0_377, __s0_377, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_377 = __noswap_splatq_laneq_p8(__rev0_377, __p1_377); \ - __ret_377 = __builtin_shufflevector(__ret_377, __ret_377, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#ifdef __LITTLE_ENDIAN__ +#define vdupq_lane_p64(__p0_377, __p1_377) __extension__ ({ \ + poly64x2_t __ret_377; \ + poly64x1_t __s0_377 = __p0_377; \ + __ret_377 = splatq_lane_p64(__s0_377, __p1_377); \ __ret_377; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p64(__p0_378, __p1_378) __extension__ ({ \ +#else +#define vdupq_lane_p64(__p0_378, __p1_378) __extension__ ({ \ poly64x2_t __ret_378; \ - poly64x2_t __s0_378 = __p0_378; \ - __ret_378 = splatq_laneq_p64(__s0_378, __p1_378); \ + poly64x1_t __s0_378 = __p0_378; \ + __ret_378 = __noswap_splatq_lane_p64(__s0_378, __p1_378); \ + __ret_378 = __builtin_shufflevector(__ret_378, __ret_378, __lane_reverse_128_64); \ __ret_378; \ }) -#else -#define vdupq_laneq_p64(__p0_379, __p1_379) __extension__ ({ \ - poly64x2_t __ret_379; \ - poly64x2_t __s0_379 = __p0_379; \ - poly64x2_t __rev0_379; __rev0_379 = __builtin_shufflevector(__s0_379, __s0_379, 1, 0); \ - __ret_379 = __noswap_splatq_laneq_p64(__rev0_379, __p1_379); \ - __ret_379 = __builtin_shufflevector(__ret_379, __ret_379, 1, 0); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupq_lane_f64(__p0_379, __p1_379) __extension__ ({ \ + float64x2_t __ret_379; \ + float64x1_t __s0_379 = __p0_379; \ + __ret_379 = splatq_lane_f64(__s0_379, __p1_379); \ __ret_379; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p16(__p0_380, __p1_380) __extension__ ({ \ - poly16x8_t __ret_380; \ - poly16x8_t __s0_380 = __p0_380; \ - __ret_380 = splatq_laneq_p16(__s0_380, __p1_380); \ +#else +#define vdupq_lane_f64(__p0_380, __p1_380) __extension__ ({ \ + float64x2_t __ret_380; \ + float64x1_t __s0_380 = __p0_380; \ + __ret_380 = __noswap_splatq_lane_f64(__s0_380, __p1_380); \ + __ret_380 = __builtin_shufflevector(__ret_380, __ret_380, __lane_reverse_128_64); \ __ret_380; \ }) -#else -#define vdupq_laneq_p16(__p0_381, __p1_381) __extension__ ({ \ - poly16x8_t __ret_381; \ - poly16x8_t __s0_381 = __p0_381; \ - poly16x8_t __rev0_381; __rev0_381 = __builtin_shufflevector(__s0_381, __s0_381, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_381 = __noswap_splatq_laneq_p16(__rev0_381, __p1_381); \ - __ret_381 = __builtin_shufflevector(__ret_381, __ret_381, 7, 6, 5, 4, 3, 2, 1, 0); \ +#endif + +#define vdup_lane_f64(__p0_381, __p1_381) __extension__ ({ \ + float64x1_t __ret_381; \ + float64x1_t __s0_381 = __p0_381; \ + __ret_381 = splat_lane_f64(__s0_381, __p1_381); \ __ret_381; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8_t __ret; \ + poly8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__s0, __p1)); \ + __ret; \ +}) +#else +#define vdupb_laneq_p8(__p0, __p1) __extension__ ({ \ + poly8_t __ret; \ + poly8x16_t __s0 = __p0; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(poly8_t, __builtin_neon_vdupb_laneq_i8(__rev0, __p1)); \ + __ret; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u8(__p0_382, __p1_382) __extension__ ({ \ - uint8x16_t __ret_382; \ - uint8x16_t __s0_382 = __p0_382; \ - __ret_382 = splatq_laneq_u8(__s0_382, __p1_382); \ +#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16_t __ret; \ + poly16x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__s0, __p1)); \ + __ret; \ +}) +#else +#define vduph_laneq_p16(__p0, __p1) __extension__ ({ \ + poly16_t __ret; \ + poly16x8_t __s0 = __p0; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(poly16_t, __builtin_neon_vduph_laneq_i16(__rev0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8_t __ret; \ + uint8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vdupb_laneq_u8(__p0, __p1) __extension__ ({ \ + uint8_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32_t __ret; \ + uint32x4_t __s0 = __p0; \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vdups_laneq_i32(__builtin_bit_cast(int32x4_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vdups_laneq_u32(__p0, __p1) __extension__ ({ \ + uint32_t __ret; \ + uint32x4_t __s0 = __p0; \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vdups_laneq_i32(__builtin_bit_cast(int32x4_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64_t __ret; \ + uint64x2_t __s0 = __p0; \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vdupd_laneq_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vdupd_laneq_u64(__p0, __p1) __extension__ ({ \ + uint64_t __ret; \ + uint64x2_t __s0 = __p0; \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vdupd_laneq_i64(__builtin_bit_cast(int64x2_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16_t __ret; \ + uint16x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vduph_laneq_u16(__p0, __p1) __extension__ ({ \ + uint16_t __ret; \ + uint16x8_t __s0 = __p0; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ + int8_t __ret; \ + int8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vdupb_laneq_s8(__p0, __p1) __extension__ ({ \ + int8_t __ret; \ + int8x16_t __s0 = __p0; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vdupb_laneq_i8(__builtin_bit_cast(int8x16_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ + float64_t __ret; \ + float64x2_t __s0 = __p0; \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vdupd_laneq_f64(__s0, __p1)); \ + __ret; \ +}) +#else +#define vdupd_laneq_f64(__p0, __p1) __extension__ ({ \ + float64_t __ret; \ + float64x2_t __s0 = __p0; \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vdupd_laneq_f64(__rev0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ + float32_t __ret; \ + float32x4_t __s0 = __p0; \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vdups_laneq_f32(__s0, __p1)); \ + __ret; \ +}) +#else +#define vdups_laneq_f32(__p0, __p1) __extension__ ({ \ + float32_t __ret; \ + float32x4_t __s0 = __p0; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vdups_laneq_f32(__rev0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ + int32_t __ret; \ + int32x4_t __s0 = __p0; \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vdups_laneq_i32(__builtin_bit_cast(int32x4_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vdups_laneq_s32(__p0, __p1) __extension__ ({ \ + int32_t __ret; \ + int32x4_t __s0 = __p0; \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vdups_laneq_i32(__builtin_bit_cast(int32x4_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ + int64_t __ret; \ + int64x2_t __s0 = __p0; \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vdupd_laneq_i64(__builtin_bit_cast(int64x2_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vdupd_laneq_s64(__p0, __p1) __extension__ ({ \ + int64_t __ret; \ + int64x2_t __s0 = __p0; \ + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vdupd_laneq_i64(__builtin_bit_cast(int64x2_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdupb_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vdupb_laneq_mf8(__s0, __p1)); \ + __ret; \ +}) +#else +#define vdupb_laneq_mf8(__p0, __p1) __extension__ ({ \ + mfloat8_t __ret; \ + mfloat8x16_t __s0 = __p0; \ + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8_t, __builtin_neon_vdupb_laneq_mf8(__rev0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ + int16_t __ret; \ + int16x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __s0), __p1)); \ + __ret; \ +}) +#else +#define vduph_laneq_s16(__p0, __p1) __extension__ ({ \ + int16_t __ret; \ + int16x8_t __s0 = __p0; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vduph_laneq_i16(__builtin_bit_cast(int16x8_t, __rev0), __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ + float16_t __ret; \ + float16x8_t __s0 = __p0; \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vduph_laneq_f16(__s0, __p1)); \ + __ret; \ +}) +#else +#define vduph_laneq_f16(__p0, __p1) __extension__ ({ \ + float16_t __ret; \ + float16x8_t __s0 = __p0; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(float16_t, __builtin_neon_vduph_laneq_f16(__rev0, __p1)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_p8(__p0_382, __p1_382) __extension__ ({ \ + poly8x8_t __ret_382; \ + poly8x16_t __s0_382 = __p0_382; \ + __ret_382 = splat_laneq_p8(__s0_382, __p1_382); \ __ret_382; \ }) #else -#define vdupq_laneq_u8(__p0_383, __p1_383) __extension__ ({ \ - uint8x16_t __ret_383; \ - uint8x16_t __s0_383 = __p0_383; \ - uint8x16_t __rev0_383; __rev0_383 = __builtin_shufflevector(__s0_383, __s0_383, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_383 = __noswap_splatq_laneq_u8(__rev0_383, __p1_383); \ - __ret_383 = __builtin_shufflevector(__ret_383, __ret_383, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdup_laneq_p8(__p0_383, __p1_383) __extension__ ({ \ + poly8x8_t __ret_383; \ + poly8x16_t __s0_383 = __p0_383; \ + poly8x16_t __rev0_383; __rev0_383 = __builtin_shufflevector(__s0_383, __s0_383, __lane_reverse_128_8); \ + __ret_383 = __noswap_splat_laneq_p8(__rev0_383, __p1_383); \ + __ret_383 = __builtin_shufflevector(__ret_383, __ret_383, __lane_reverse_64_8); \ __ret_383; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u32(__p0_384, __p1_384) __extension__ ({ \ - uint32x4_t __ret_384; \ - uint32x4_t __s0_384 = __p0_384; \ - __ret_384 = splatq_laneq_u32(__s0_384, __p1_384); \ +#define vdup_laneq_p64(__p0_384, __p1_384) __extension__ ({ \ + poly64x1_t __ret_384; \ + poly64x2_t __s0_384 = __p0_384; \ + __ret_384 = splat_laneq_p64(__s0_384, __p1_384); \ __ret_384; \ }) #else -#define vdupq_laneq_u32(__p0_385, __p1_385) __extension__ ({ \ - uint32x4_t __ret_385; \ - uint32x4_t __s0_385 = __p0_385; \ - uint32x4_t __rev0_385; __rev0_385 = __builtin_shufflevector(__s0_385, __s0_385, 3, 2, 1, 0); \ - __ret_385 = __noswap_splatq_laneq_u32(__rev0_385, __p1_385); \ - __ret_385 = __builtin_shufflevector(__ret_385, __ret_385, 3, 2, 1, 0); \ +#define vdup_laneq_p64(__p0_385, __p1_385) __extension__ ({ \ + poly64x1_t __ret_385; \ + poly64x2_t __s0_385 = __p0_385; \ + poly64x2_t __rev0_385; __rev0_385 = __builtin_shufflevector(__s0_385, __s0_385, __lane_reverse_128_64); \ + __ret_385 = __noswap_splat_laneq_p64(__rev0_385, __p1_385); \ __ret_385; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u64(__p0_386, __p1_386) __extension__ ({ \ - uint64x2_t __ret_386; \ - uint64x2_t __s0_386 = __p0_386; \ - __ret_386 = splatq_laneq_u64(__s0_386, __p1_386); \ +#define vdup_laneq_p16(__p0_386, __p1_386) __extension__ ({ \ + poly16x4_t __ret_386; \ + poly16x8_t __s0_386 = __p0_386; \ + __ret_386 = splat_laneq_p16(__s0_386, __p1_386); \ __ret_386; \ }) #else -#define vdupq_laneq_u64(__p0_387, __p1_387) __extension__ ({ \ - uint64x2_t __ret_387; \ - uint64x2_t __s0_387 = __p0_387; \ - uint64x2_t __rev0_387; __rev0_387 = __builtin_shufflevector(__s0_387, __s0_387, 1, 0); \ - __ret_387 = __noswap_splatq_laneq_u64(__rev0_387, __p1_387); \ - __ret_387 = __builtin_shufflevector(__ret_387, __ret_387, 1, 0); \ +#define vdup_laneq_p16(__p0_387, __p1_387) __extension__ ({ \ + poly16x4_t __ret_387; \ + poly16x8_t __s0_387 = __p0_387; \ + poly16x8_t __rev0_387; __rev0_387 = __builtin_shufflevector(__s0_387, __s0_387, __lane_reverse_128_16); \ + __ret_387 = __noswap_splat_laneq_p16(__rev0_387, __p1_387); \ + __ret_387 = __builtin_shufflevector(__ret_387, __ret_387, __lane_reverse_64_16); \ __ret_387; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u16(__p0_388, __p1_388) __extension__ ({ \ - uint16x8_t __ret_388; \ - uint16x8_t __s0_388 = __p0_388; \ - __ret_388 = splatq_laneq_u16(__s0_388, __p1_388); \ +#define vdupq_laneq_p8(__p0_388, __p1_388) __extension__ ({ \ + poly8x16_t __ret_388; \ + poly8x16_t __s0_388 = __p0_388; \ + __ret_388 = splatq_laneq_p8(__s0_388, __p1_388); \ __ret_388; \ }) #else -#define vdupq_laneq_u16(__p0_389, __p1_389) __extension__ ({ \ - uint16x8_t __ret_389; \ - uint16x8_t __s0_389 = __p0_389; \ - uint16x8_t __rev0_389; __rev0_389 = __builtin_shufflevector(__s0_389, __s0_389, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_389 = __noswap_splatq_laneq_u16(__rev0_389, __p1_389); \ - __ret_389 = __builtin_shufflevector(__ret_389, __ret_389, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_p8(__p0_389, __p1_389) __extension__ ({ \ + poly8x16_t __ret_389; \ + poly8x16_t __s0_389 = __p0_389; \ + poly8x16_t __rev0_389; __rev0_389 = __builtin_shufflevector(__s0_389, __s0_389, __lane_reverse_128_8); \ + __ret_389 = __noswap_splatq_laneq_p8(__rev0_389, __p1_389); \ + __ret_389 = __builtin_shufflevector(__ret_389, __ret_389, __lane_reverse_128_8); \ __ret_389; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s8(__p0_390, __p1_390) __extension__ ({ \ - int8x16_t __ret_390; \ - int8x16_t __s0_390 = __p0_390; \ - __ret_390 = splatq_laneq_s8(__s0_390, __p1_390); \ +#define vdupq_laneq_p64(__p0_390, __p1_390) __extension__ ({ \ + poly64x2_t __ret_390; \ + poly64x2_t __s0_390 = __p0_390; \ + __ret_390 = splatq_laneq_p64(__s0_390, __p1_390); \ __ret_390; \ }) #else -#define vdupq_laneq_s8(__p0_391, __p1_391) __extension__ ({ \ - int8x16_t __ret_391; \ - int8x16_t __s0_391 = __p0_391; \ - int8x16_t __rev0_391; __rev0_391 = __builtin_shufflevector(__s0_391, __s0_391, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_391 = __noswap_splatq_laneq_s8(__rev0_391, __p1_391); \ - __ret_391 = __builtin_shufflevector(__ret_391, __ret_391, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_p64(__p0_391, __p1_391) __extension__ ({ \ + poly64x2_t __ret_391; \ + poly64x2_t __s0_391 = __p0_391; \ + poly64x2_t __rev0_391; __rev0_391 = __builtin_shufflevector(__s0_391, __s0_391, __lane_reverse_128_64); \ + __ret_391 = __noswap_splatq_laneq_p64(__rev0_391, __p1_391); \ + __ret_391 = __builtin_shufflevector(__ret_391, __ret_391, __lane_reverse_128_64); \ __ret_391; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f64(__p0_392, __p1_392) __extension__ ({ \ - float64x2_t __ret_392; \ - float64x2_t __s0_392 = __p0_392; \ - __ret_392 = splatq_laneq_f64(__s0_392, __p1_392); \ +#define vdupq_laneq_p16(__p0_392, __p1_392) __extension__ ({ \ + poly16x8_t __ret_392; \ + poly16x8_t __s0_392 = __p0_392; \ + __ret_392 = splatq_laneq_p16(__s0_392, __p1_392); \ __ret_392; \ }) #else -#define vdupq_laneq_f64(__p0_393, __p1_393) __extension__ ({ \ - float64x2_t __ret_393; \ - float64x2_t __s0_393 = __p0_393; \ - float64x2_t __rev0_393; __rev0_393 = __builtin_shufflevector(__s0_393, __s0_393, 1, 0); \ - __ret_393 = __noswap_splatq_laneq_f64(__rev0_393, __p1_393); \ - __ret_393 = __builtin_shufflevector(__ret_393, __ret_393, 1, 0); \ +#define vdupq_laneq_p16(__p0_393, __p1_393) __extension__ ({ \ + poly16x8_t __ret_393; \ + poly16x8_t __s0_393 = __p0_393; \ + poly16x8_t __rev0_393; __rev0_393 = __builtin_shufflevector(__s0_393, __s0_393, __lane_reverse_128_16); \ + __ret_393 = __noswap_splatq_laneq_p16(__rev0_393, __p1_393); \ + __ret_393 = __builtin_shufflevector(__ret_393, __ret_393, __lane_reverse_128_16); \ __ret_393; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f32(__p0_394, __p1_394) __extension__ ({ \ - float32x4_t __ret_394; \ - float32x4_t __s0_394 = __p0_394; \ - __ret_394 = splatq_laneq_f32(__s0_394, __p1_394); \ +#define vdupq_laneq_u8(__p0_394, __p1_394) __extension__ ({ \ + uint8x16_t __ret_394; \ + uint8x16_t __s0_394 = __p0_394; \ + __ret_394 = splatq_laneq_u8(__s0_394, __p1_394); \ __ret_394; \ }) #else -#define vdupq_laneq_f32(__p0_395, __p1_395) __extension__ ({ \ - float32x4_t __ret_395; \ - float32x4_t __s0_395 = __p0_395; \ - float32x4_t __rev0_395; __rev0_395 = __builtin_shufflevector(__s0_395, __s0_395, 3, 2, 1, 0); \ - __ret_395 = __noswap_splatq_laneq_f32(__rev0_395, __p1_395); \ - __ret_395 = __builtin_shufflevector(__ret_395, __ret_395, 3, 2, 1, 0); \ +#define vdupq_laneq_u8(__p0_395, __p1_395) __extension__ ({ \ + uint8x16_t __ret_395; \ + uint8x16_t __s0_395 = __p0_395; \ + uint8x16_t __rev0_395; __rev0_395 = __builtin_shufflevector(__s0_395, __s0_395, __lane_reverse_128_8); \ + __ret_395 = __noswap_splatq_laneq_u8(__rev0_395, __p1_395); \ + __ret_395 = __builtin_shufflevector(__ret_395, __ret_395, __lane_reverse_128_8); \ __ret_395; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f16(__p0_396, __p1_396) __extension__ ({ \ - float16x8_t __ret_396; \ - float16x8_t __s0_396 = __p0_396; \ - __ret_396 = splatq_laneq_f16(__s0_396, __p1_396); \ +#define vdupq_laneq_u32(__p0_396, __p1_396) __extension__ ({ \ + uint32x4_t __ret_396; \ + uint32x4_t __s0_396 = __p0_396; \ + __ret_396 = splatq_laneq_u32(__s0_396, __p1_396); \ __ret_396; \ }) #else -#define vdupq_laneq_f16(__p0_397, __p1_397) __extension__ ({ \ - float16x8_t __ret_397; \ - float16x8_t __s0_397 = __p0_397; \ - float16x8_t __rev0_397; __rev0_397 = __builtin_shufflevector(__s0_397, __s0_397, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_397 = __noswap_splatq_laneq_f16(__rev0_397, __p1_397); \ - __ret_397 = __builtin_shufflevector(__ret_397, __ret_397, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_u32(__p0_397, __p1_397) __extension__ ({ \ + uint32x4_t __ret_397; \ + uint32x4_t __s0_397 = __p0_397; \ + uint32x4_t __rev0_397; __rev0_397 = __builtin_shufflevector(__s0_397, __s0_397, __lane_reverse_128_32); \ + __ret_397 = __noswap_splatq_laneq_u32(__rev0_397, __p1_397); \ + __ret_397 = __builtin_shufflevector(__ret_397, __ret_397, __lane_reverse_128_32); \ __ret_397; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s32(__p0_398, __p1_398) __extension__ ({ \ - int32x4_t __ret_398; \ - int32x4_t __s0_398 = __p0_398; \ - __ret_398 = splatq_laneq_s32(__s0_398, __p1_398); \ +#define vdupq_laneq_u64(__p0_398, __p1_398) __extension__ ({ \ + uint64x2_t __ret_398; \ + uint64x2_t __s0_398 = __p0_398; \ + __ret_398 = splatq_laneq_u64(__s0_398, __p1_398); \ __ret_398; \ }) #else -#define vdupq_laneq_s32(__p0_399, __p1_399) __extension__ ({ \ - int32x4_t __ret_399; \ - int32x4_t __s0_399 = __p0_399; \ - int32x4_t __rev0_399; __rev0_399 = __builtin_shufflevector(__s0_399, __s0_399, 3, 2, 1, 0); \ - __ret_399 = __noswap_splatq_laneq_s32(__rev0_399, __p1_399); \ - __ret_399 = __builtin_shufflevector(__ret_399, __ret_399, 3, 2, 1, 0); \ +#define vdupq_laneq_u64(__p0_399, __p1_399) __extension__ ({ \ + uint64x2_t __ret_399; \ + uint64x2_t __s0_399 = __p0_399; \ + uint64x2_t __rev0_399; __rev0_399 = __builtin_shufflevector(__s0_399, __s0_399, __lane_reverse_128_64); \ + __ret_399 = __noswap_splatq_laneq_u64(__rev0_399, __p1_399); \ + __ret_399 = __builtin_shufflevector(__ret_399, __ret_399, __lane_reverse_128_64); \ __ret_399; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s64(__p0_400, __p1_400) __extension__ ({ \ - int64x2_t __ret_400; \ - int64x2_t __s0_400 = __p0_400; \ - __ret_400 = splatq_laneq_s64(__s0_400, __p1_400); \ +#define vdupq_laneq_u16(__p0_400, __p1_400) __extension__ ({ \ + uint16x8_t __ret_400; \ + uint16x8_t __s0_400 = __p0_400; \ + __ret_400 = splatq_laneq_u16(__s0_400, __p1_400); \ __ret_400; \ }) #else -#define vdupq_laneq_s64(__p0_401, __p1_401) __extension__ ({ \ - int64x2_t __ret_401; \ - int64x2_t __s0_401 = __p0_401; \ - int64x2_t __rev0_401; __rev0_401 = __builtin_shufflevector(__s0_401, __s0_401, 1, 0); \ - __ret_401 = __noswap_splatq_laneq_s64(__rev0_401, __p1_401); \ - __ret_401 = __builtin_shufflevector(__ret_401, __ret_401, 1, 0); \ +#define vdupq_laneq_u16(__p0_401, __p1_401) __extension__ ({ \ + uint16x8_t __ret_401; \ + uint16x8_t __s0_401 = __p0_401; \ + uint16x8_t __rev0_401; __rev0_401 = __builtin_shufflevector(__s0_401, __s0_401, __lane_reverse_128_16); \ + __ret_401 = __noswap_splatq_laneq_u16(__rev0_401, __p1_401); \ + __ret_401 = __builtin_shufflevector(__ret_401, __ret_401, __lane_reverse_128_16); \ __ret_401; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s16(__p0_402, __p1_402) __extension__ ({ \ - int16x8_t __ret_402; \ - int16x8_t __s0_402 = __p0_402; \ - __ret_402 = splatq_laneq_s16(__s0_402, __p1_402); \ +#define vdupq_laneq_s8(__p0_402, __p1_402) __extension__ ({ \ + int8x16_t __ret_402; \ + int8x16_t __s0_402 = __p0_402; \ + __ret_402 = splatq_laneq_s8(__s0_402, __p1_402); \ __ret_402; \ }) #else -#define vdupq_laneq_s16(__p0_403, __p1_403) __extension__ ({ \ - int16x8_t __ret_403; \ - int16x8_t __s0_403 = __p0_403; \ - int16x8_t __rev0_403; __rev0_403 = __builtin_shufflevector(__s0_403, __s0_403, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_403 = __noswap_splatq_laneq_s16(__rev0_403, __p1_403); \ - __ret_403 = __builtin_shufflevector(__ret_403, __ret_403, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_s8(__p0_403, __p1_403) __extension__ ({ \ + int8x16_t __ret_403; \ + int8x16_t __s0_403 = __p0_403; \ + int8x16_t __rev0_403; __rev0_403 = __builtin_shufflevector(__s0_403, __s0_403, __lane_reverse_128_8); \ + __ret_403 = __noswap_splatq_laneq_s8(__rev0_403, __p1_403); \ + __ret_403 = __builtin_shufflevector(__ret_403, __ret_403, __lane_reverse_128_8); \ __ret_403; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u8(__p0_404, __p1_404) __extension__ ({ \ - uint8x8_t __ret_404; \ - uint8x16_t __s0_404 = __p0_404; \ - __ret_404 = splat_laneq_u8(__s0_404, __p1_404); \ +#define vdupq_laneq_f64(__p0_404, __p1_404) __extension__ ({ \ + float64x2_t __ret_404; \ + float64x2_t __s0_404 = __p0_404; \ + __ret_404 = splatq_laneq_f64(__s0_404, __p1_404); \ __ret_404; \ }) #else -#define vdup_laneq_u8(__p0_405, __p1_405) __extension__ ({ \ - uint8x8_t __ret_405; \ - uint8x16_t __s0_405 = __p0_405; \ - uint8x16_t __rev0_405; __rev0_405 = __builtin_shufflevector(__s0_405, __s0_405, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_405 = __noswap_splat_laneq_u8(__rev0_405, __p1_405); \ - __ret_405 = __builtin_shufflevector(__ret_405, __ret_405, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_f64(__p0_405, __p1_405) __extension__ ({ \ + float64x2_t __ret_405; \ + float64x2_t __s0_405 = __p0_405; \ + float64x2_t __rev0_405; __rev0_405 = __builtin_shufflevector(__s0_405, __s0_405, __lane_reverse_128_64); \ + __ret_405 = __noswap_splatq_laneq_f64(__rev0_405, __p1_405); \ + __ret_405 = __builtin_shufflevector(__ret_405, __ret_405, __lane_reverse_128_64); \ __ret_405; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u32(__p0_406, __p1_406) __extension__ ({ \ - uint32x2_t __ret_406; \ - uint32x4_t __s0_406 = __p0_406; \ - __ret_406 = splat_laneq_u32(__s0_406, __p1_406); \ +#define vdupq_laneq_f32(__p0_406, __p1_406) __extension__ ({ \ + float32x4_t __ret_406; \ + float32x4_t __s0_406 = __p0_406; \ + __ret_406 = splatq_laneq_f32(__s0_406, __p1_406); \ __ret_406; \ }) #else -#define vdup_laneq_u32(__p0_407, __p1_407) __extension__ ({ \ - uint32x2_t __ret_407; \ - uint32x4_t __s0_407 = __p0_407; \ - uint32x4_t __rev0_407; __rev0_407 = __builtin_shufflevector(__s0_407, __s0_407, 3, 2, 1, 0); \ - __ret_407 = __noswap_splat_laneq_u32(__rev0_407, __p1_407); \ - __ret_407 = __builtin_shufflevector(__ret_407, __ret_407, 1, 0); \ +#define vdupq_laneq_f32(__p0_407, __p1_407) __extension__ ({ \ + float32x4_t __ret_407; \ + float32x4_t __s0_407 = __p0_407; \ + float32x4_t __rev0_407; __rev0_407 = __builtin_shufflevector(__s0_407, __s0_407, __lane_reverse_128_32); \ + __ret_407 = __noswap_splatq_laneq_f32(__rev0_407, __p1_407); \ + __ret_407 = __builtin_shufflevector(__ret_407, __ret_407, __lane_reverse_128_32); \ __ret_407; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u64(__p0_408, __p1_408) __extension__ ({ \ - uint64x1_t __ret_408; \ - uint64x2_t __s0_408 = __p0_408; \ - __ret_408 = splat_laneq_u64(__s0_408, __p1_408); \ +#define vdupq_laneq_f16(__p0_408, __p1_408) __extension__ ({ \ + float16x8_t __ret_408; \ + float16x8_t __s0_408 = __p0_408; \ + __ret_408 = splatq_laneq_f16(__s0_408, __p1_408); \ __ret_408; \ }) #else -#define vdup_laneq_u64(__p0_409, __p1_409) __extension__ ({ \ - uint64x1_t __ret_409; \ - uint64x2_t __s0_409 = __p0_409; \ - uint64x2_t __rev0_409; __rev0_409 = __builtin_shufflevector(__s0_409, __s0_409, 1, 0); \ - __ret_409 = __noswap_splat_laneq_u64(__rev0_409, __p1_409); \ +#define vdupq_laneq_f16(__p0_409, __p1_409) __extension__ ({ \ + float16x8_t __ret_409; \ + float16x8_t __s0_409 = __p0_409; \ + float16x8_t __rev0_409; __rev0_409 = __builtin_shufflevector(__s0_409, __s0_409, __lane_reverse_128_16); \ + __ret_409 = __noswap_splatq_laneq_f16(__rev0_409, __p1_409); \ + __ret_409 = __builtin_shufflevector(__ret_409, __ret_409, __lane_reverse_128_16); \ __ret_409; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u16(__p0_410, __p1_410) __extension__ ({ \ - uint16x4_t __ret_410; \ - uint16x8_t __s0_410 = __p0_410; \ - __ret_410 = splat_laneq_u16(__s0_410, __p1_410); \ +#define vdupq_laneq_s32(__p0_410, __p1_410) __extension__ ({ \ + int32x4_t __ret_410; \ + int32x4_t __s0_410 = __p0_410; \ + __ret_410 = splatq_laneq_s32(__s0_410, __p1_410); \ __ret_410; \ }) #else -#define vdup_laneq_u16(__p0_411, __p1_411) __extension__ ({ \ - uint16x4_t __ret_411; \ - uint16x8_t __s0_411 = __p0_411; \ - uint16x8_t __rev0_411; __rev0_411 = __builtin_shufflevector(__s0_411, __s0_411, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_411 = __noswap_splat_laneq_u16(__rev0_411, __p1_411); \ - __ret_411 = __builtin_shufflevector(__ret_411, __ret_411, 3, 2, 1, 0); \ +#define vdupq_laneq_s32(__p0_411, __p1_411) __extension__ ({ \ + int32x4_t __ret_411; \ + int32x4_t __s0_411 = __p0_411; \ + int32x4_t __rev0_411; __rev0_411 = __builtin_shufflevector(__s0_411, __s0_411, __lane_reverse_128_32); \ + __ret_411 = __noswap_splatq_laneq_s32(__rev0_411, __p1_411); \ + __ret_411 = __builtin_shufflevector(__ret_411, __ret_411, __lane_reverse_128_32); \ __ret_411; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s8(__p0_412, __p1_412) __extension__ ({ \ - int8x8_t __ret_412; \ - int8x16_t __s0_412 = __p0_412; \ - __ret_412 = splat_laneq_s8(__s0_412, __p1_412); \ +#define vdupq_laneq_s64(__p0_412, __p1_412) __extension__ ({ \ + int64x2_t __ret_412; \ + int64x2_t __s0_412 = __p0_412; \ + __ret_412 = splatq_laneq_s64(__s0_412, __p1_412); \ __ret_412; \ }) #else -#define vdup_laneq_s8(__p0_413, __p1_413) __extension__ ({ \ - int8x8_t __ret_413; \ - int8x16_t __s0_413 = __p0_413; \ - int8x16_t __rev0_413; __rev0_413 = __builtin_shufflevector(__s0_413, __s0_413, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_413 = __noswap_splat_laneq_s8(__rev0_413, __p1_413); \ - __ret_413 = __builtin_shufflevector(__ret_413, __ret_413, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vdupq_laneq_s64(__p0_413, __p1_413) __extension__ ({ \ + int64x2_t __ret_413; \ + int64x2_t __s0_413 = __p0_413; \ + int64x2_t __rev0_413; __rev0_413 = __builtin_shufflevector(__s0_413, __s0_413, __lane_reverse_128_64); \ + __ret_413 = __noswap_splatq_laneq_s64(__rev0_413, __p1_413); \ + __ret_413 = __builtin_shufflevector(__ret_413, __ret_413, __lane_reverse_128_64); \ __ret_413; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f64(__p0_414, __p1_414) __extension__ ({ \ - float64x1_t __ret_414; \ - float64x2_t __s0_414 = __p0_414; \ - __ret_414 = splat_laneq_f64(__s0_414, __p1_414); \ +#define vdupq_laneq_mf8(__p0_414, __p1_414) __extension__ ({ \ + mfloat8x16_t __ret_414; \ + mfloat8x16_t __s0_414 = __p0_414; \ + __ret_414 = splatq_laneq_mf8(__s0_414, __p1_414); \ __ret_414; \ }) #else -#define vdup_laneq_f64(__p0_415, __p1_415) __extension__ ({ \ - float64x1_t __ret_415; \ - float64x2_t __s0_415 = __p0_415; \ - float64x2_t __rev0_415; __rev0_415 = __builtin_shufflevector(__s0_415, __s0_415, 1, 0); \ - __ret_415 = __noswap_splat_laneq_f64(__rev0_415, __p1_415); \ +#define vdupq_laneq_mf8(__p0_415, __p1_415) __extension__ ({ \ + mfloat8x16_t __ret_415; \ + mfloat8x16_t __s0_415 = __p0_415; \ + mfloat8x16_t __rev0_415; __rev0_415 = __builtin_shufflevector(__s0_415, __s0_415, __lane_reverse_128_8); \ + __ret_415 = __noswap_splatq_laneq_mf8(__rev0_415, __p1_415); \ + __ret_415 = __builtin_shufflevector(__ret_415, __ret_415, __lane_reverse_128_8); \ __ret_415; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f32(__p0_416, __p1_416) __extension__ ({ \ - float32x2_t __ret_416; \ - float32x4_t __s0_416 = __p0_416; \ - __ret_416 = splat_laneq_f32(__s0_416, __p1_416); \ +#define vdupq_laneq_s16(__p0_416, __p1_416) __extension__ ({ \ + int16x8_t __ret_416; \ + int16x8_t __s0_416 = __p0_416; \ + __ret_416 = splatq_laneq_s16(__s0_416, __p1_416); \ __ret_416; \ }) #else -#define vdup_laneq_f32(__p0_417, __p1_417) __extension__ ({ \ - float32x2_t __ret_417; \ - float32x4_t __s0_417 = __p0_417; \ - float32x4_t __rev0_417; __rev0_417 = __builtin_shufflevector(__s0_417, __s0_417, 3, 2, 1, 0); \ - __ret_417 = __noswap_splat_laneq_f32(__rev0_417, __p1_417); \ - __ret_417 = __builtin_shufflevector(__ret_417, __ret_417, 1, 0); \ +#define vdupq_laneq_s16(__p0_417, __p1_417) __extension__ ({ \ + int16x8_t __ret_417; \ + int16x8_t __s0_417 = __p0_417; \ + int16x8_t __rev0_417; __rev0_417 = __builtin_shufflevector(__s0_417, __s0_417, __lane_reverse_128_16); \ + __ret_417 = __noswap_splatq_laneq_s16(__rev0_417, __p1_417); \ + __ret_417 = __builtin_shufflevector(__ret_417, __ret_417, __lane_reverse_128_16); \ __ret_417; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f16(__p0_418, __p1_418) __extension__ ({ \ - float16x4_t __ret_418; \ - float16x8_t __s0_418 = __p0_418; \ - __ret_418 = splat_laneq_f16(__s0_418, __p1_418); \ +#define vdup_laneq_u8(__p0_418, __p1_418) __extension__ ({ \ + uint8x8_t __ret_418; \ + uint8x16_t __s0_418 = __p0_418; \ + __ret_418 = splat_laneq_u8(__s0_418, __p1_418); \ __ret_418; \ }) #else -#define vdup_laneq_f16(__p0_419, __p1_419) __extension__ ({ \ - float16x4_t __ret_419; \ - float16x8_t __s0_419 = __p0_419; \ - float16x8_t __rev0_419; __rev0_419 = __builtin_shufflevector(__s0_419, __s0_419, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_419 = __noswap_splat_laneq_f16(__rev0_419, __p1_419); \ - __ret_419 = __builtin_shufflevector(__ret_419, __ret_419, 3, 2, 1, 0); \ +#define vdup_laneq_u8(__p0_419, __p1_419) __extension__ ({ \ + uint8x8_t __ret_419; \ + uint8x16_t __s0_419 = __p0_419; \ + uint8x16_t __rev0_419; __rev0_419 = __builtin_shufflevector(__s0_419, __s0_419, __lane_reverse_128_8); \ + __ret_419 = __noswap_splat_laneq_u8(__rev0_419, __p1_419); \ + __ret_419 = __builtin_shufflevector(__ret_419, __ret_419, __lane_reverse_64_8); \ __ret_419; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s32(__p0_420, __p1_420) __extension__ ({ \ - int32x2_t __ret_420; \ - int32x4_t __s0_420 = __p0_420; \ - __ret_420 = splat_laneq_s32(__s0_420, __p1_420); \ +#define vdup_laneq_u32(__p0_420, __p1_420) __extension__ ({ \ + uint32x2_t __ret_420; \ + uint32x4_t __s0_420 = __p0_420; \ + __ret_420 = splat_laneq_u32(__s0_420, __p1_420); \ __ret_420; \ }) #else -#define vdup_laneq_s32(__p0_421, __p1_421) __extension__ ({ \ - int32x2_t __ret_421; \ - int32x4_t __s0_421 = __p0_421; \ - int32x4_t __rev0_421; __rev0_421 = __builtin_shufflevector(__s0_421, __s0_421, 3, 2, 1, 0); \ - __ret_421 = __noswap_splat_laneq_s32(__rev0_421, __p1_421); \ - __ret_421 = __builtin_shufflevector(__ret_421, __ret_421, 1, 0); \ +#define vdup_laneq_u32(__p0_421, __p1_421) __extension__ ({ \ + uint32x2_t __ret_421; \ + uint32x4_t __s0_421 = __p0_421; \ + uint32x4_t __rev0_421; __rev0_421 = __builtin_shufflevector(__s0_421, __s0_421, __lane_reverse_128_32); \ + __ret_421 = __noswap_splat_laneq_u32(__rev0_421, __p1_421); \ + __ret_421 = __builtin_shufflevector(__ret_421, __ret_421, __lane_reverse_64_32); \ __ret_421; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s64(__p0_422, __p1_422) __extension__ ({ \ - int64x1_t __ret_422; \ - int64x2_t __s0_422 = __p0_422; \ - __ret_422 = splat_laneq_s64(__s0_422, __p1_422); \ +#define vdup_laneq_u64(__p0_422, __p1_422) __extension__ ({ \ + uint64x1_t __ret_422; \ + uint64x2_t __s0_422 = __p0_422; \ + __ret_422 = splat_laneq_u64(__s0_422, __p1_422); \ __ret_422; \ }) #else -#define vdup_laneq_s64(__p0_423, __p1_423) __extension__ ({ \ - int64x1_t __ret_423; \ - int64x2_t __s0_423 = __p0_423; \ - int64x2_t __rev0_423; __rev0_423 = __builtin_shufflevector(__s0_423, __s0_423, 1, 0); \ - __ret_423 = __noswap_splat_laneq_s64(__rev0_423, __p1_423); \ +#define vdup_laneq_u64(__p0_423, __p1_423) __extension__ ({ \ + uint64x1_t __ret_423; \ + uint64x2_t __s0_423 = __p0_423; \ + uint64x2_t __rev0_423; __rev0_423 = __builtin_shufflevector(__s0_423, __s0_423, __lane_reverse_128_64); \ + __ret_423 = __noswap_splat_laneq_u64(__rev0_423, __p1_423); \ __ret_423; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s16(__p0_424, __p1_424) __extension__ ({ \ - int16x4_t __ret_424; \ - int16x8_t __s0_424 = __p0_424; \ - __ret_424 = splat_laneq_s16(__s0_424, __p1_424); \ +#define vdup_laneq_u16(__p0_424, __p1_424) __extension__ ({ \ + uint16x4_t __ret_424; \ + uint16x8_t __s0_424 = __p0_424; \ + __ret_424 = splat_laneq_u16(__s0_424, __p1_424); \ __ret_424; \ }) #else -#define vdup_laneq_s16(__p0_425, __p1_425) __extension__ ({ \ - int16x4_t __ret_425; \ - int16x8_t __s0_425 = __p0_425; \ - int16x8_t __rev0_425; __rev0_425 = __builtin_shufflevector(__s0_425, __s0_425, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_425 = __noswap_splat_laneq_s16(__rev0_425, __p1_425); \ - __ret_425 = __builtin_shufflevector(__ret_425, __ret_425, 3, 2, 1, 0); \ +#define vdup_laneq_u16(__p0_425, __p1_425) __extension__ ({ \ + uint16x4_t __ret_425; \ + uint16x8_t __s0_425 = __p0_425; \ + uint16x8_t __rev0_425; __rev0_425 = __builtin_shufflevector(__s0_425, __s0_425, __lane_reverse_128_16); \ + __ret_425 = __noswap_splat_laneq_u16(__rev0_425, __p1_425); \ + __ret_425 = __builtin_shufflevector(__ret_425, __ret_425, __lane_reverse_64_16); \ __ret_425; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_s8(__p0_426, __p1_426) __extension__ ({ \ + int8x8_t __ret_426; \ + int8x16_t __s0_426 = __p0_426; \ + __ret_426 = splat_laneq_s8(__s0_426, __p1_426); \ + __ret_426; \ +}) +#else +#define vdup_laneq_s8(__p0_427, __p1_427) __extension__ ({ \ + int8x8_t __ret_427; \ + int8x16_t __s0_427 = __p0_427; \ + int8x16_t __rev0_427; __rev0_427 = __builtin_shufflevector(__s0_427, __s0_427, __lane_reverse_128_8); \ + __ret_427 = __noswap_splat_laneq_s8(__rev0_427, __p1_427); \ + __ret_427 = __builtin_shufflevector(__ret_427, __ret_427, __lane_reverse_64_8); \ + __ret_427; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_f64(__p0_428, __p1_428) __extension__ ({ \ + float64x1_t __ret_428; \ + float64x2_t __s0_428 = __p0_428; \ + __ret_428 = splat_laneq_f64(__s0_428, __p1_428); \ + __ret_428; \ +}) +#else +#define vdup_laneq_f64(__p0_429, __p1_429) __extension__ ({ \ + float64x1_t __ret_429; \ + float64x2_t __s0_429 = __p0_429; \ + float64x2_t __rev0_429; __rev0_429 = __builtin_shufflevector(__s0_429, __s0_429, __lane_reverse_128_64); \ + __ret_429 = __noswap_splat_laneq_f64(__rev0_429, __p1_429); \ + __ret_429; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_f32(__p0_430, __p1_430) __extension__ ({ \ + float32x2_t __ret_430; \ + float32x4_t __s0_430 = __p0_430; \ + __ret_430 = splat_laneq_f32(__s0_430, __p1_430); \ + __ret_430; \ +}) +#else +#define vdup_laneq_f32(__p0_431, __p1_431) __extension__ ({ \ + float32x2_t __ret_431; \ + float32x4_t __s0_431 = __p0_431; \ + float32x4_t __rev0_431; __rev0_431 = __builtin_shufflevector(__s0_431, __s0_431, __lane_reverse_128_32); \ + __ret_431 = __noswap_splat_laneq_f32(__rev0_431, __p1_431); \ + __ret_431 = __builtin_shufflevector(__ret_431, __ret_431, __lane_reverse_64_32); \ + __ret_431; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_f16(__p0_432, __p1_432) __extension__ ({ \ + float16x4_t __ret_432; \ + float16x8_t __s0_432 = __p0_432; \ + __ret_432 = splat_laneq_f16(__s0_432, __p1_432); \ + __ret_432; \ +}) +#else +#define vdup_laneq_f16(__p0_433, __p1_433) __extension__ ({ \ + float16x4_t __ret_433; \ + float16x8_t __s0_433 = __p0_433; \ + float16x8_t __rev0_433; __rev0_433 = __builtin_shufflevector(__s0_433, __s0_433, __lane_reverse_128_16); \ + __ret_433 = __noswap_splat_laneq_f16(__rev0_433, __p1_433); \ + __ret_433 = __builtin_shufflevector(__ret_433, __ret_433, __lane_reverse_64_16); \ + __ret_433; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_s32(__p0_434, __p1_434) __extension__ ({ \ + int32x2_t __ret_434; \ + int32x4_t __s0_434 = __p0_434; \ + __ret_434 = splat_laneq_s32(__s0_434, __p1_434); \ + __ret_434; \ +}) +#else +#define vdup_laneq_s32(__p0_435, __p1_435) __extension__ ({ \ + int32x2_t __ret_435; \ + int32x4_t __s0_435 = __p0_435; \ + int32x4_t __rev0_435; __rev0_435 = __builtin_shufflevector(__s0_435, __s0_435, __lane_reverse_128_32); \ + __ret_435 = __noswap_splat_laneq_s32(__rev0_435, __p1_435); \ + __ret_435 = __builtin_shufflevector(__ret_435, __ret_435, __lane_reverse_64_32); \ + __ret_435; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_s64(__p0_436, __p1_436) __extension__ ({ \ + int64x1_t __ret_436; \ + int64x2_t __s0_436 = __p0_436; \ + __ret_436 = splat_laneq_s64(__s0_436, __p1_436); \ + __ret_436; \ +}) +#else +#define vdup_laneq_s64(__p0_437, __p1_437) __extension__ ({ \ + int64x1_t __ret_437; \ + int64x2_t __s0_437 = __p0_437; \ + int64x2_t __rev0_437; __rev0_437 = __builtin_shufflevector(__s0_437, __s0_437, __lane_reverse_128_64); \ + __ret_437 = __noswap_splat_laneq_s64(__rev0_437, __p1_437); \ + __ret_437; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_mf8(__p0_438, __p1_438) __extension__ ({ \ + mfloat8x8_t __ret_438; \ + mfloat8x16_t __s0_438 = __p0_438; \ + __ret_438 = splat_laneq_mf8(__s0_438, __p1_438); \ + __ret_438; \ +}) +#else +#define vdup_laneq_mf8(__p0_439, __p1_439) __extension__ ({ \ + mfloat8x8_t __ret_439; \ + mfloat8x16_t __s0_439 = __p0_439; \ + mfloat8x16_t __rev0_439; __rev0_439 = __builtin_shufflevector(__s0_439, __s0_439, __lane_reverse_128_8); \ + __ret_439 = __noswap_splat_laneq_mf8(__rev0_439, __p1_439); \ + __ret_439 = __builtin_shufflevector(__ret_439, __ret_439, __lane_reverse_64_8); \ + __ret_439; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdup_laneq_s16(__p0_440, __p1_440) __extension__ ({ \ + int16x4_t __ret_440; \ + int16x8_t __s0_440 = __p0_440; \ + __ret_440 = splat_laneq_s16(__s0_440, __p1_440); \ + __ret_440; \ +}) +#else +#define vdup_laneq_s16(__p0_441, __p1_441) __extension__ ({ \ + int16x4_t __ret_441; \ + int16x8_t __s0_441 = __p0_441; \ + int16x8_t __rev0_441; __rev0_441 = __builtin_shufflevector(__s0_441, __s0_441, __lane_reverse_128_16); \ + __ret_441 = __noswap_splat_laneq_s16(__rev0_441, __p1_441); \ + __ret_441 = __builtin_shufflevector(__ret_441, __ret_441, __lane_reverse_64_16); \ + __ret_441; \ +}) +#endif + __ai __attribute__((target("neon"))) poly64x1_t vdup_n_p64(poly64_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t) {__p0}; @@ -50197,7 +51291,7 @@ __ai __attribute__((target("neon"))) poly64x2_t vdupq_n_p64(poly64_t __p0) { __ai __attribute__((target("neon"))) poly64x2_t vdupq_n_p64(poly64_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -50212,7 +51306,7 @@ __ai __attribute__((target("neon"))) float64x2_t vdupq_n_f64(float64_t __p0) { __ai __attribute__((target("neon"))) float64x2_t vdupq_n_f64(float64_t __p0) { float64x2_t __ret; __ret = (float64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -50226,7 +51320,7 @@ __ai __attribute__((target("neon"))) float64x1_t vdup_n_f64(float64_t __p0) { poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ - __ret = (poly64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -50234,7 +51328,7 @@ __ai __attribute__((target("neon"))) float64x1_t vdup_n_f64(float64_t __p0) { poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 38)); \ __ret; \ }) #else @@ -50242,10 +51336,10 @@ __ai __attribute__((target("neon"))) float64x1_t vdup_n_f64(float64_t __p0) { poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -50255,7 +51349,7 @@ __ai __attribute__((target("neon"))) float64x1_t vdup_n_f64(float64_t __p0) { float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ - __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 42)); \ __ret; \ }) #else @@ -50263,10 +51357,10 @@ __ai __attribute__((target("neon"))) float64x1_t vdup_n_f64(float64_t __p0) { float64x2_t __ret; \ float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vextq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vextq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -50275,35 +51369,35 @@ __ai __attribute__((target("neon"))) float64x1_t vdup_n_f64(float64_t __p0) { float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ - __ret = (float64x1_t) __builtin_neon_vext_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vext_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 10)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) float64x2_t __noswap_vfmaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vfma_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 10)); return __ret; } #define vfmad_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50311,7 +51405,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ - __ret = (float64_t) __builtin_neon_vfmad_lane_f64(__s0, __s1, (float64x1_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vfmad_lane_f64(__s0, __s1, __s2, __p3)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -50320,7 +51414,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vfmas_lane_f32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -50329,8 +51423,8 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__rev2, __p3); \ + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vfmas_lane_f32(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #define __noswap_vfmas_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50338,7 +51432,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - __ret = (float32_t) __builtin_neon_vfmas_lane_f32(__s0, __s1, (float32x2_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vfmas_lane_f32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #endif @@ -50349,7 +51443,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 42)); \ __ret; \ }) #else @@ -50358,10 +51452,10 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__s2, __p3, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_lane_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __s2), __p3, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vfmaq_lane_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50369,7 +51463,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 42)); \ __ret; \ }) #endif @@ -50380,7 +51474,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 41)); \ __ret; \ }) #else @@ -50389,11 +51483,11 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, __p3, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_lane_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), __p3, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vfmaq_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50401,7 +51495,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_lane_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x8_t)__s2, __p3, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 41)); \ __ret; \ }) #endif @@ -50411,7 +51505,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x1_t __s2 = __p2; \ - __ret = (float64x1_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vfma_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 10)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -50420,7 +51514,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 9)); \ __ret; \ }) #else @@ -50429,11 +51523,11 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, __p3, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), __p3, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vfma_lane_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50441,7 +51535,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x2_t __s2 = __p2; \ - __ret = (float32x2_t) __builtin_neon_vfma_lane_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x8_t)__s2, __p3, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x8_t, __s2), __p3, 9)); \ __ret; \ }) #endif @@ -50452,7 +51546,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vfmad_laneq_f64(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -50461,8 +51555,8 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__rev2, __p3); \ + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vfmad_laneq_f64(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #define __noswap_vfmad_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50470,7 +51564,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64_t __s0 = __p0; \ float64_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - __ret = (float64_t) __builtin_neon_vfmad_laneq_f64(__s0, __s1, (float64x2_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vfmad_laneq_f64(__s0, __s1, __s2, __p3)); \ __ret; \ }) #endif @@ -50481,7 +51575,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vfmas_laneq_f32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -50490,8 +51584,8 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__rev2, __p3); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vfmas_laneq_f32(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #define __noswap_vfmas_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50499,7 +51593,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32_t __s0 = __p0; \ float32_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - __ret = (float32_t) __builtin_neon_vfmas_laneq_f32(__s0, __s1, (float32x4_t)__s2, __p3); \ + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vfmas_laneq_f32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #endif @@ -50510,7 +51604,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 42)); \ __ret; \ }) #else @@ -50519,11 +51613,11 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vfmaq_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50531,7 +51625,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x2_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - __ret = (float64x2_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vfmaq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 42)); \ __ret; \ }) #endif @@ -50542,7 +51636,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 41)); \ __ret; \ }) #else @@ -50551,11 +51645,11 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 41); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 41)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #define __noswap_vfmaq_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50563,7 +51657,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x4_t __s0 = __p0; \ float32x4_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - __ret = (float32x4_t) __builtin_neon_vfmaq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 41); \ + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vfmaq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 41)); \ __ret; \ }) #endif @@ -50574,7 +51668,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vfma_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 10)); \ __ret; \ }) #else @@ -50583,8 +51677,8 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__rev2, __p3, 10); \ + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vfma_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 10)); \ __ret; \ }) #define __noswap_vfma_laneq_f64(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50592,7 +51686,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ float64x2_t __s2 = __p2; \ - __ret = (float64x1_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vfma_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 10)); \ __ret; \ }) #endif @@ -50603,7 +51697,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 9)); \ __ret; \ }) #else @@ -50612,11 +51706,11 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x16_t)__rev2, __p3, 9); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_laneq_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 9)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #define __noswap_vfma_laneq_f32(__p0, __p1, __p2, __p3) __extension__ ({ \ @@ -50624,7 +51718,7 @@ __ai __attribute__((target("neon"))) float64x1_t vfma_f64(float64x1_t __p0, floa float32x2_t __s0 = __p0; \ float32x2_t __s1 = __p1; \ float32x4_t __s2 = __p2; \ - __ret = (float32x2_t) __builtin_neon_vfma_laneq_v((int8x8_t)__s0, (int8x8_t)__s1, (int8x16_t)__s2, __p3, 9); \ + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vfma_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 9)); \ __ret; \ }) #endif @@ -50638,10 +51732,10 @@ __ai __attribute__((target("neon"))) float64x2_t vfmaq_n_f64(float64x2_t __p0, f #else __ai __attribute__((target("neon"))) float64x2_t vfmaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vfmaq_f64(__rev0, __rev1, (float64x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -50660,11 +51754,11 @@ __ai __attribute__((target("neon"))) float64x2_t vfmsq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vfmsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vfmaq_f64(__rev0, -__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -50674,246 +51768,246 @@ __ai __attribute__((target("neon"))) float64x1_t vfms_f64(float64x1_t __p0, floa __ret = vfma_f64(__p0, -__p1, __p2); return __ret; } -#define vfmsd_lane_f64(__p0_426, __p1_426, __p2_426, __p3_426) __extension__ ({ \ - float64_t __ret_426; \ - float64_t __s0_426 = __p0_426; \ - float64_t __s1_426 = __p1_426; \ - float64x1_t __s2_426 = __p2_426; \ - __ret_426 = vfmad_lane_f64(__s0_426, -__s1_426, __s2_426, __p3_426); \ - __ret_426; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vfmss_lane_f32(__p0_427, __p1_427, __p2_427, __p3_427) __extension__ ({ \ - float32_t __ret_427; \ - float32_t __s0_427 = __p0_427; \ - float32_t __s1_427 = __p1_427; \ - float32x2_t __s2_427 = __p2_427; \ - __ret_427 = vfmas_lane_f32(__s0_427, -__s1_427, __s2_427, __p3_427); \ - __ret_427; \ -}) -#else -#define vfmss_lane_f32(__p0_428, __p1_428, __p2_428, __p3_428) __extension__ ({ \ - float32_t __ret_428; \ - float32_t __s0_428 = __p0_428; \ - float32_t __s1_428 = __p1_428; \ - float32x2_t __s2_428 = __p2_428; \ - float32x2_t __rev2_428; __rev2_428 = __builtin_shufflevector(__s2_428, __s2_428, 1, 0); \ - __ret_428 = __noswap_vfmas_lane_f32(__s0_428, -__s1_428, __rev2_428, __p3_428); \ - __ret_428; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f64(__p0_429, __p1_429, __p2_429, __p3_429) __extension__ ({ \ - float64x2_t __ret_429; \ - float64x2_t __s0_429 = __p0_429; \ - float64x2_t __s1_429 = __p1_429; \ - float64x1_t __s2_429 = __p2_429; \ - __ret_429 = vfmaq_lane_f64(__s0_429, -__s1_429, __s2_429, __p3_429); \ - __ret_429; \ -}) -#else -#define vfmsq_lane_f64(__p0_430, __p1_430, __p2_430, __p3_430) __extension__ ({ \ - float64x2_t __ret_430; \ - float64x2_t __s0_430 = __p0_430; \ - float64x2_t __s1_430 = __p1_430; \ - float64x1_t __s2_430 = __p2_430; \ - float64x2_t __rev0_430; __rev0_430 = __builtin_shufflevector(__s0_430, __s0_430, 1, 0); \ - float64x2_t __rev1_430; __rev1_430 = __builtin_shufflevector(__s1_430, __s1_430, 1, 0); \ - __ret_430 = __noswap_vfmaq_lane_f64(__rev0_430, -__rev1_430, __s2_430, __p3_430); \ - __ret_430 = __builtin_shufflevector(__ret_430, __ret_430, 1, 0); \ - __ret_430; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f32(__p0_431, __p1_431, __p2_431, __p3_431) __extension__ ({ \ - float32x4_t __ret_431; \ - float32x4_t __s0_431 = __p0_431; \ - float32x4_t __s1_431 = __p1_431; \ - float32x2_t __s2_431 = __p2_431; \ - __ret_431 = vfmaq_lane_f32(__s0_431, -__s1_431, __s2_431, __p3_431); \ - __ret_431; \ -}) -#else -#define vfmsq_lane_f32(__p0_432, __p1_432, __p2_432, __p3_432) __extension__ ({ \ - float32x4_t __ret_432; \ - float32x4_t __s0_432 = __p0_432; \ - float32x4_t __s1_432 = __p1_432; \ - float32x2_t __s2_432 = __p2_432; \ - float32x4_t __rev0_432; __rev0_432 = __builtin_shufflevector(__s0_432, __s0_432, 3, 2, 1, 0); \ - float32x4_t __rev1_432; __rev1_432 = __builtin_shufflevector(__s1_432, __s1_432, 3, 2, 1, 0); \ - float32x2_t __rev2_432; __rev2_432 = __builtin_shufflevector(__s2_432, __s2_432, 1, 0); \ - __ret_432 = __noswap_vfmaq_lane_f32(__rev0_432, -__rev1_432, __rev2_432, __p3_432); \ - __ret_432 = __builtin_shufflevector(__ret_432, __ret_432, 3, 2, 1, 0); \ - __ret_432; \ -}) -#endif - -#define vfms_lane_f64(__p0_433, __p1_433, __p2_433, __p3_433) __extension__ ({ \ - float64x1_t __ret_433; \ - float64x1_t __s0_433 = __p0_433; \ - float64x1_t __s1_433 = __p1_433; \ - float64x1_t __s2_433 = __p2_433; \ - __ret_433 = vfma_lane_f64(__s0_433, -__s1_433, __s2_433, __p3_433); \ - __ret_433; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f32(__p0_434, __p1_434, __p2_434, __p3_434) __extension__ ({ \ - float32x2_t __ret_434; \ - float32x2_t __s0_434 = __p0_434; \ - float32x2_t __s1_434 = __p1_434; \ - float32x2_t __s2_434 = __p2_434; \ - __ret_434 = vfma_lane_f32(__s0_434, -__s1_434, __s2_434, __p3_434); \ - __ret_434; \ -}) -#else -#define vfms_lane_f32(__p0_435, __p1_435, __p2_435, __p3_435) __extension__ ({ \ - float32x2_t __ret_435; \ - float32x2_t __s0_435 = __p0_435; \ - float32x2_t __s1_435 = __p1_435; \ - float32x2_t __s2_435 = __p2_435; \ - float32x2_t __rev0_435; __rev0_435 = __builtin_shufflevector(__s0_435, __s0_435, 1, 0); \ - float32x2_t __rev1_435; __rev1_435 = __builtin_shufflevector(__s1_435, __s1_435, 1, 0); \ - float32x2_t __rev2_435; __rev2_435 = __builtin_shufflevector(__s2_435, __s2_435, 1, 0); \ - __ret_435 = __noswap_vfma_lane_f32(__rev0_435, -__rev1_435, __rev2_435, __p3_435); \ - __ret_435 = __builtin_shufflevector(__ret_435, __ret_435, 1, 0); \ - __ret_435; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsd_laneq_f64(__p0_436, __p1_436, __p2_436, __p3_436) __extension__ ({ \ - float64_t __ret_436; \ - float64_t __s0_436 = __p0_436; \ - float64_t __s1_436 = __p1_436; \ - float64x2_t __s2_436 = __p2_436; \ - __ret_436 = vfmad_laneq_f64(__s0_436, -__s1_436, __s2_436, __p3_436); \ - __ret_436; \ -}) -#else -#define vfmsd_laneq_f64(__p0_437, __p1_437, __p2_437, __p3_437) __extension__ ({ \ - float64_t __ret_437; \ - float64_t __s0_437 = __p0_437; \ - float64_t __s1_437 = __p1_437; \ - float64x2_t __s2_437 = __p2_437; \ - float64x2_t __rev2_437; __rev2_437 = __builtin_shufflevector(__s2_437, __s2_437, 1, 0); \ - __ret_437 = __noswap_vfmad_laneq_f64(__s0_437, -__s1_437, __rev2_437, __p3_437); \ - __ret_437; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmss_laneq_f32(__p0_438, __p1_438, __p2_438, __p3_438) __extension__ ({ \ - float32_t __ret_438; \ - float32_t __s0_438 = __p0_438; \ - float32_t __s1_438 = __p1_438; \ - float32x4_t __s2_438 = __p2_438; \ - __ret_438 = vfmas_laneq_f32(__s0_438, -__s1_438, __s2_438, __p3_438); \ - __ret_438; \ -}) -#else -#define vfmss_laneq_f32(__p0_439, __p1_439, __p2_439, __p3_439) __extension__ ({ \ - float32_t __ret_439; \ - float32_t __s0_439 = __p0_439; \ - float32_t __s1_439 = __p1_439; \ - float32x4_t __s2_439 = __p2_439; \ - float32x4_t __rev2_439; __rev2_439 = __builtin_shufflevector(__s2_439, __s2_439, 3, 2, 1, 0); \ - __ret_439 = __noswap_vfmas_laneq_f32(__s0_439, -__s1_439, __rev2_439, __p3_439); \ - __ret_439; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f64(__p0_440, __p1_440, __p2_440, __p3_440) __extension__ ({ \ - float64x2_t __ret_440; \ - float64x2_t __s0_440 = __p0_440; \ - float64x2_t __s1_440 = __p1_440; \ - float64x2_t __s2_440 = __p2_440; \ - __ret_440 = vfmaq_laneq_f64(__s0_440, -__s1_440, __s2_440, __p3_440); \ - __ret_440; \ -}) -#else -#define vfmsq_laneq_f64(__p0_441, __p1_441, __p2_441, __p3_441) __extension__ ({ \ - float64x2_t __ret_441; \ - float64x2_t __s0_441 = __p0_441; \ - float64x2_t __s1_441 = __p1_441; \ - float64x2_t __s2_441 = __p2_441; \ - float64x2_t __rev0_441; __rev0_441 = __builtin_shufflevector(__s0_441, __s0_441, 1, 0); \ - float64x2_t __rev1_441; __rev1_441 = __builtin_shufflevector(__s1_441, __s1_441, 1, 0); \ - float64x2_t __rev2_441; __rev2_441 = __builtin_shufflevector(__s2_441, __s2_441, 1, 0); \ - __ret_441 = __noswap_vfmaq_laneq_f64(__rev0_441, -__rev1_441, __rev2_441, __p3_441); \ - __ret_441 = __builtin_shufflevector(__ret_441, __ret_441, 1, 0); \ - __ret_441; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f32(__p0_442, __p1_442, __p2_442, __p3_442) __extension__ ({ \ - float32x4_t __ret_442; \ - float32x4_t __s0_442 = __p0_442; \ - float32x4_t __s1_442 = __p1_442; \ - float32x4_t __s2_442 = __p2_442; \ - __ret_442 = vfmaq_laneq_f32(__s0_442, -__s1_442, __s2_442, __p3_442); \ +#define vfmsd_lane_f64(__p0_442, __p1_442, __p2_442, __p3_442) __extension__ ({ \ + float64_t __ret_442; \ + float64_t __s0_442 = __p0_442; \ + float64_t __s1_442 = __p1_442; \ + float64x1_t __s2_442 = __p2_442; \ + __ret_442 = vfmad_lane_f64(__s0_442, -__s1_442, __s2_442, __p3_442); \ __ret_442; \ }) -#else -#define vfmsq_laneq_f32(__p0_443, __p1_443, __p2_443, __p3_443) __extension__ ({ \ - float32x4_t __ret_443; \ - float32x4_t __s0_443 = __p0_443; \ - float32x4_t __s1_443 = __p1_443; \ - float32x4_t __s2_443 = __p2_443; \ - float32x4_t __rev0_443; __rev0_443 = __builtin_shufflevector(__s0_443, __s0_443, 3, 2, 1, 0); \ - float32x4_t __rev1_443; __rev1_443 = __builtin_shufflevector(__s1_443, __s1_443, 3, 2, 1, 0); \ - float32x4_t __rev2_443; __rev2_443 = __builtin_shufflevector(__s2_443, __s2_443, 3, 2, 1, 0); \ - __ret_443 = __noswap_vfmaq_laneq_f32(__rev0_443, -__rev1_443, __rev2_443, __p3_443); \ - __ret_443 = __builtin_shufflevector(__ret_443, __ret_443, 3, 2, 1, 0); \ +#ifdef __LITTLE_ENDIAN__ +#define vfmss_lane_f32(__p0_443, __p1_443, __p2_443, __p3_443) __extension__ ({ \ + float32_t __ret_443; \ + float32_t __s0_443 = __p0_443; \ + float32_t __s1_443 = __p1_443; \ + float32x2_t __s2_443 = __p2_443; \ + __ret_443 = vfmas_lane_f32(__s0_443, -__s1_443, __s2_443, __p3_443); \ __ret_443; \ }) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f64(__p0_444, __p1_444, __p2_444, __p3_444) __extension__ ({ \ - float64x1_t __ret_444; \ - float64x1_t __s0_444 = __p0_444; \ - float64x1_t __s1_444 = __p1_444; \ - float64x2_t __s2_444 = __p2_444; \ - __ret_444 = vfma_laneq_f64(__s0_444, -__s1_444, __s2_444, __p3_444); \ +#else +#define vfmss_lane_f32(__p0_444, __p1_444, __p2_444, __p3_444) __extension__ ({ \ + float32_t __ret_444; \ + float32_t __s0_444 = __p0_444; \ + float32_t __s1_444 = __p1_444; \ + float32x2_t __s2_444 = __p2_444; \ + float32x2_t __rev2_444; __rev2_444 = __builtin_shufflevector(__s2_444, __s2_444, __lane_reverse_64_32); \ + __ret_444 = __noswap_vfmas_lane_f32(__s0_444, -__s1_444, __rev2_444, __p3_444); \ __ret_444; \ }) -#else -#define vfms_laneq_f64(__p0_445, __p1_445, __p2_445, __p3_445) __extension__ ({ \ - float64x1_t __ret_445; \ - float64x1_t __s0_445 = __p0_445; \ - float64x1_t __s1_445 = __p1_445; \ - float64x2_t __s2_445 = __p2_445; \ - float64x2_t __rev2_445; __rev2_445 = __builtin_shufflevector(__s2_445, __s2_445, 1, 0); \ - __ret_445 = __noswap_vfma_laneq_f64(__s0_445, -__s1_445, __rev2_445, __p3_445); \ +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmsq_lane_f64(__p0_445, __p1_445, __p2_445, __p3_445) __extension__ ({ \ + float64x2_t __ret_445; \ + float64x2_t __s0_445 = __p0_445; \ + float64x2_t __s1_445 = __p1_445; \ + float64x1_t __s2_445 = __p2_445; \ + __ret_445 = vfmaq_lane_f64(__s0_445, -__s1_445, __s2_445, __p3_445); \ __ret_445; \ }) +#else +#define vfmsq_lane_f64(__p0_446, __p1_446, __p2_446, __p3_446) __extension__ ({ \ + float64x2_t __ret_446; \ + float64x2_t __s0_446 = __p0_446; \ + float64x2_t __s1_446 = __p1_446; \ + float64x1_t __s2_446 = __p2_446; \ + float64x2_t __rev0_446; __rev0_446 = __builtin_shufflevector(__s0_446, __s0_446, __lane_reverse_128_64); \ + float64x2_t __rev1_446; __rev1_446 = __builtin_shufflevector(__s1_446, __s1_446, __lane_reverse_128_64); \ + __ret_446 = __noswap_vfmaq_lane_f64(__rev0_446, -__rev1_446, __s2_446, __p3_446); \ + __ret_446 = __builtin_shufflevector(__ret_446, __ret_446, __lane_reverse_128_64); \ + __ret_446; \ +}) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f32(__p0_446, __p1_446, __p2_446, __p3_446) __extension__ ({ \ - float32x2_t __ret_446; \ - float32x2_t __s0_446 = __p0_446; \ - float32x2_t __s1_446 = __p1_446; \ - float32x4_t __s2_446 = __p2_446; \ - __ret_446 = vfma_laneq_f32(__s0_446, -__s1_446, __s2_446, __p3_446); \ - __ret_446; \ +#define vfmsq_lane_f32(__p0_447, __p1_447, __p2_447, __p3_447) __extension__ ({ \ + float32x4_t __ret_447; \ + float32x4_t __s0_447 = __p0_447; \ + float32x4_t __s1_447 = __p1_447; \ + float32x2_t __s2_447 = __p2_447; \ + __ret_447 = vfmaq_lane_f32(__s0_447, -__s1_447, __s2_447, __p3_447); \ + __ret_447; \ }) #else -#define vfms_laneq_f32(__p0_447, __p1_447, __p2_447, __p3_447) __extension__ ({ \ - float32x2_t __ret_447; \ - float32x2_t __s0_447 = __p0_447; \ - float32x2_t __s1_447 = __p1_447; \ - float32x4_t __s2_447 = __p2_447; \ - float32x2_t __rev0_447; __rev0_447 = __builtin_shufflevector(__s0_447, __s0_447, 1, 0); \ - float32x2_t __rev1_447; __rev1_447 = __builtin_shufflevector(__s1_447, __s1_447, 1, 0); \ - float32x4_t __rev2_447; __rev2_447 = __builtin_shufflevector(__s2_447, __s2_447, 3, 2, 1, 0); \ - __ret_447 = __noswap_vfma_laneq_f32(__rev0_447, -__rev1_447, __rev2_447, __p3_447); \ - __ret_447 = __builtin_shufflevector(__ret_447, __ret_447, 1, 0); \ - __ret_447; \ +#define vfmsq_lane_f32(__p0_448, __p1_448, __p2_448, __p3_448) __extension__ ({ \ + float32x4_t __ret_448; \ + float32x4_t __s0_448 = __p0_448; \ + float32x4_t __s1_448 = __p1_448; \ + float32x2_t __s2_448 = __p2_448; \ + float32x4_t __rev0_448; __rev0_448 = __builtin_shufflevector(__s0_448, __s0_448, __lane_reverse_128_32); \ + float32x4_t __rev1_448; __rev1_448 = __builtin_shufflevector(__s1_448, __s1_448, __lane_reverse_128_32); \ + float32x2_t __rev2_448; __rev2_448 = __builtin_shufflevector(__s2_448, __s2_448, __lane_reverse_64_32); \ + __ret_448 = __noswap_vfmaq_lane_f32(__rev0_448, -__rev1_448, __rev2_448, __p3_448); \ + __ret_448 = __builtin_shufflevector(__ret_448, __ret_448, __lane_reverse_128_32); \ + __ret_448; \ +}) +#endif + +#define vfms_lane_f64(__p0_449, __p1_449, __p2_449, __p3_449) __extension__ ({ \ + float64x1_t __ret_449; \ + float64x1_t __s0_449 = __p0_449; \ + float64x1_t __s1_449 = __p1_449; \ + float64x1_t __s2_449 = __p2_449; \ + __ret_449 = vfma_lane_f64(__s0_449, -__s1_449, __s2_449, __p3_449); \ + __ret_449; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vfms_lane_f32(__p0_450, __p1_450, __p2_450, __p3_450) __extension__ ({ \ + float32x2_t __ret_450; \ + float32x2_t __s0_450 = __p0_450; \ + float32x2_t __s1_450 = __p1_450; \ + float32x2_t __s2_450 = __p2_450; \ + __ret_450 = vfma_lane_f32(__s0_450, -__s1_450, __s2_450, __p3_450); \ + __ret_450; \ +}) +#else +#define vfms_lane_f32(__p0_451, __p1_451, __p2_451, __p3_451) __extension__ ({ \ + float32x2_t __ret_451; \ + float32x2_t __s0_451 = __p0_451; \ + float32x2_t __s1_451 = __p1_451; \ + float32x2_t __s2_451 = __p2_451; \ + float32x2_t __rev0_451; __rev0_451 = __builtin_shufflevector(__s0_451, __s0_451, __lane_reverse_64_32); \ + float32x2_t __rev1_451; __rev1_451 = __builtin_shufflevector(__s1_451, __s1_451, __lane_reverse_64_32); \ + float32x2_t __rev2_451; __rev2_451 = __builtin_shufflevector(__s2_451, __s2_451, __lane_reverse_64_32); \ + __ret_451 = __noswap_vfma_lane_f32(__rev0_451, -__rev1_451, __rev2_451, __p3_451); \ + __ret_451 = __builtin_shufflevector(__ret_451, __ret_451, __lane_reverse_64_32); \ + __ret_451; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmsd_laneq_f64(__p0_452, __p1_452, __p2_452, __p3_452) __extension__ ({ \ + float64_t __ret_452; \ + float64_t __s0_452 = __p0_452; \ + float64_t __s1_452 = __p1_452; \ + float64x2_t __s2_452 = __p2_452; \ + __ret_452 = vfmad_laneq_f64(__s0_452, -__s1_452, __s2_452, __p3_452); \ + __ret_452; \ +}) +#else +#define vfmsd_laneq_f64(__p0_453, __p1_453, __p2_453, __p3_453) __extension__ ({ \ + float64_t __ret_453; \ + float64_t __s0_453 = __p0_453; \ + float64_t __s1_453 = __p1_453; \ + float64x2_t __s2_453 = __p2_453; \ + float64x2_t __rev2_453; __rev2_453 = __builtin_shufflevector(__s2_453, __s2_453, __lane_reverse_128_64); \ + __ret_453 = __noswap_vfmad_laneq_f64(__s0_453, -__s1_453, __rev2_453, __p3_453); \ + __ret_453; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmss_laneq_f32(__p0_454, __p1_454, __p2_454, __p3_454) __extension__ ({ \ + float32_t __ret_454; \ + float32_t __s0_454 = __p0_454; \ + float32_t __s1_454 = __p1_454; \ + float32x4_t __s2_454 = __p2_454; \ + __ret_454 = vfmas_laneq_f32(__s0_454, -__s1_454, __s2_454, __p3_454); \ + __ret_454; \ +}) +#else +#define vfmss_laneq_f32(__p0_455, __p1_455, __p2_455, __p3_455) __extension__ ({ \ + float32_t __ret_455; \ + float32_t __s0_455 = __p0_455; \ + float32_t __s1_455 = __p1_455; \ + float32x4_t __s2_455 = __p2_455; \ + float32x4_t __rev2_455; __rev2_455 = __builtin_shufflevector(__s2_455, __s2_455, __lane_reverse_128_32); \ + __ret_455 = __noswap_vfmas_laneq_f32(__s0_455, -__s1_455, __rev2_455, __p3_455); \ + __ret_455; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmsq_laneq_f64(__p0_456, __p1_456, __p2_456, __p3_456) __extension__ ({ \ + float64x2_t __ret_456; \ + float64x2_t __s0_456 = __p0_456; \ + float64x2_t __s1_456 = __p1_456; \ + float64x2_t __s2_456 = __p2_456; \ + __ret_456 = vfmaq_laneq_f64(__s0_456, -__s1_456, __s2_456, __p3_456); \ + __ret_456; \ +}) +#else +#define vfmsq_laneq_f64(__p0_457, __p1_457, __p2_457, __p3_457) __extension__ ({ \ + float64x2_t __ret_457; \ + float64x2_t __s0_457 = __p0_457; \ + float64x2_t __s1_457 = __p1_457; \ + float64x2_t __s2_457 = __p2_457; \ + float64x2_t __rev0_457; __rev0_457 = __builtin_shufflevector(__s0_457, __s0_457, __lane_reverse_128_64); \ + float64x2_t __rev1_457; __rev1_457 = __builtin_shufflevector(__s1_457, __s1_457, __lane_reverse_128_64); \ + float64x2_t __rev2_457; __rev2_457 = __builtin_shufflevector(__s2_457, __s2_457, __lane_reverse_128_64); \ + __ret_457 = __noswap_vfmaq_laneq_f64(__rev0_457, -__rev1_457, __rev2_457, __p3_457); \ + __ret_457 = __builtin_shufflevector(__ret_457, __ret_457, __lane_reverse_128_64); \ + __ret_457; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmsq_laneq_f32(__p0_458, __p1_458, __p2_458, __p3_458) __extension__ ({ \ + float32x4_t __ret_458; \ + float32x4_t __s0_458 = __p0_458; \ + float32x4_t __s1_458 = __p1_458; \ + float32x4_t __s2_458 = __p2_458; \ + __ret_458 = vfmaq_laneq_f32(__s0_458, -__s1_458, __s2_458, __p3_458); \ + __ret_458; \ +}) +#else +#define vfmsq_laneq_f32(__p0_459, __p1_459, __p2_459, __p3_459) __extension__ ({ \ + float32x4_t __ret_459; \ + float32x4_t __s0_459 = __p0_459; \ + float32x4_t __s1_459 = __p1_459; \ + float32x4_t __s2_459 = __p2_459; \ + float32x4_t __rev0_459; __rev0_459 = __builtin_shufflevector(__s0_459, __s0_459, __lane_reverse_128_32); \ + float32x4_t __rev1_459; __rev1_459 = __builtin_shufflevector(__s1_459, __s1_459, __lane_reverse_128_32); \ + float32x4_t __rev2_459; __rev2_459 = __builtin_shufflevector(__s2_459, __s2_459, __lane_reverse_128_32); \ + __ret_459 = __noswap_vfmaq_laneq_f32(__rev0_459, -__rev1_459, __rev2_459, __p3_459); \ + __ret_459 = __builtin_shufflevector(__ret_459, __ret_459, __lane_reverse_128_32); \ + __ret_459; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfms_laneq_f64(__p0_460, __p1_460, __p2_460, __p3_460) __extension__ ({ \ + float64x1_t __ret_460; \ + float64x1_t __s0_460 = __p0_460; \ + float64x1_t __s1_460 = __p1_460; \ + float64x2_t __s2_460 = __p2_460; \ + __ret_460 = vfma_laneq_f64(__s0_460, -__s1_460, __s2_460, __p3_460); \ + __ret_460; \ +}) +#else +#define vfms_laneq_f64(__p0_461, __p1_461, __p2_461, __p3_461) __extension__ ({ \ + float64x1_t __ret_461; \ + float64x1_t __s0_461 = __p0_461; \ + float64x1_t __s1_461 = __p1_461; \ + float64x2_t __s2_461 = __p2_461; \ + float64x2_t __rev2_461; __rev2_461 = __builtin_shufflevector(__s2_461, __s2_461, __lane_reverse_128_64); \ + __ret_461 = __noswap_vfma_laneq_f64(__s0_461, -__s1_461, __rev2_461, __p3_461); \ + __ret_461; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfms_laneq_f32(__p0_462, __p1_462, __p2_462, __p3_462) __extension__ ({ \ + float32x2_t __ret_462; \ + float32x2_t __s0_462 = __p0_462; \ + float32x2_t __s1_462 = __p1_462; \ + float32x4_t __s2_462 = __p2_462; \ + __ret_462 = vfma_laneq_f32(__s0_462, -__s1_462, __s2_462, __p3_462); \ + __ret_462; \ +}) +#else +#define vfms_laneq_f32(__p0_463, __p1_463, __p2_463, __p3_463) __extension__ ({ \ + float32x2_t __ret_463; \ + float32x2_t __s0_463 = __p0_463; \ + float32x2_t __s1_463 = __p1_463; \ + float32x4_t __s2_463 = __p2_463; \ + float32x2_t __rev0_463; __rev0_463 = __builtin_shufflevector(__s0_463, __s0_463, __lane_reverse_64_32); \ + float32x2_t __rev1_463; __rev1_463 = __builtin_shufflevector(__s1_463, __s1_463, __lane_reverse_64_32); \ + float32x4_t __rev2_463; __rev2_463 = __builtin_shufflevector(__s2_463, __s2_463, __lane_reverse_128_32); \ + __ret_463 = __noswap_vfma_laneq_f32(__rev0_463, -__rev1_463, __rev2_463, __p3_463); \ + __ret_463 = __builtin_shufflevector(__ret_463, __ret_463, __lane_reverse_64_32); \ + __ret_463; \ }) #endif @@ -50926,10 +52020,10 @@ __ai __attribute__((target("neon"))) float64x2_t vfmsq_n_f64(float64x2_t __p0, f #else __ai __attribute__((target("neon"))) float64x2_t vfmsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vfmaq_f64(__rev0, -__rev1, (float64x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -50943,10 +52037,10 @@ __ai __attribute__((target("neon"))) float32x4_t vfmsq_n_f32(float32x4_t __p0, f #else __ai __attribute__((target("neon"))) float32x4_t vfmsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vfmaq_f32(__rev0, -__rev1, (float32x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -50965,10 +52059,10 @@ __ai __attribute__((target("neon"))) float32x2_t vfms_n_f32(float32x2_t __p0, fl #else __ai __attribute__((target("neon"))) float32x2_t vfms_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vfma_f32(__rev0, -__rev1, (float32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -50982,7 +52076,7 @@ __ai __attribute__((target("neon"))) poly64x1_t vget_high_p64(poly64x2_t __p0) { #else __ai __attribute__((target("neon"))) poly64x1_t vget_high_p64(poly64x2_t __p0) { poly64x1_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } @@ -51002,7 +52096,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) #else __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) { float64x1_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 1); return __ret; } @@ -51011,28 +52105,28 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) #define vget_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x1_t __s0 = __p0; \ - __ret = (poly64_t) __builtin_neon_vget_lane_i64((poly64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vget_lane_i64(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__s0, __p1)); \ __ret; \ }) #else #define vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__rev0, __p1); \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__rev0, __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_p64(__p0, __p1) __extension__ ({ \ poly64_t __ret; \ poly64x2_t __s0 = __p0; \ - __ret = (poly64_t) __builtin_neon_vgetq_lane_i64((poly64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(poly64_t, __builtin_neon_vgetq_lane_i64(__s0, __p1)); \ __ret; \ }) #endif @@ -51041,21 +52135,21 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) #define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vgetq_lane_f64(__s0, __p1)); \ __ret; \ }) #else #define vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__rev0, __p1); \ + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vgetq_lane_f64(__rev0, __p1)); \ __ret; \ }) #define __noswap_vgetq_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x2_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vgetq_lane_f64((float64x2_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vgetq_lane_f64(__s0, __p1)); \ __ret; \ }) #endif @@ -51063,7 +52157,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_high_f64(float64x2_t __p0) #define vget_lane_f64(__p0, __p1) __extension__ ({ \ float64_t __ret; \ float64x1_t __s0 = __p0; \ - __ret = (float64_t) __builtin_neon_vget_lane_f64((float64x1_t)__s0, __p1); \ + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vget_lane_f64(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -51075,7 +52169,7 @@ __ai __attribute__((target("neon"))) poly64x1_t vget_low_p64(poly64x2_t __p0) { #else __ai __attribute__((target("neon"))) poly64x1_t vget_low_p64(poly64x2_t __p0) { poly64x1_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } @@ -51090,7 +52184,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #else __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) { float64x1_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev0, 0); return __ret; } @@ -51098,20 +52192,20 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld1_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_v(__p0, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vld1_v(__p0, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vld1q_v(__p0, 38)); \ __ret; \ }) #else #define vld1q_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_v(__p0, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vld1q_v(__p0, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51119,39 +52213,69 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #ifdef __LITTLE_ENDIAN__ #define vld1q_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vld1q_v(__p0, 42)); \ __ret; \ }) #else #define vld1q_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_v(__p0, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vld1q_v(__p0, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_mf8(__p0) __extension__ ({ \ + mfloat8x16_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vld1q_v(__p0, 44)); \ + __ret; \ +}) +#else +#define vld1q_mf8(__p0) __extension__ ({ \ + mfloat8x16_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vld1q_v(__p0, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif #define vld1_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_v(__p0, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vld1_v(__p0, 10)); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld1_mf8(__p0) __extension__ ({ \ + mfloat8x8_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vld1_v(__p0, 12)); \ + __ret; \ +}) +#else +#define vld1_mf8(__p0) __extension__ ({ \ + mfloat8x8_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vld1_v(__p0, 12)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld1_dup_p64(__p0) __extension__ ({ \ poly64x1_t __ret; \ - __ret = (poly64x1_t) __builtin_neon_vld1_dup_v(__p0, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vld1_dup_v(__p0, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vld1q_dup_v(__p0, 38)); \ __ret; \ }) #else #define vld1q_dup_p64(__p0) __extension__ ({ \ poly64x2_t __ret; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_dup_v(__p0, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vld1q_dup_v(__p0, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51159,43 +52283,73 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #ifdef __LITTLE_ENDIAN__ #define vld1q_dup_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vld1q_dup_v(__p0, 42)); \ __ret; \ }) #else #define vld1q_dup_f64(__p0) __extension__ ({ \ float64x2_t __ret; \ - __ret = (float64x2_t) __builtin_neon_vld1q_dup_v(__p0, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vld1q_dup_v(__p0, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vld1q_dup_v(__p0, 44)); \ + __ret; \ +}) +#else +#define vld1q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vld1q_dup_v(__p0, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif #define vld1_dup_f64(__p0) __extension__ ({ \ float64x1_t __ret; \ - __ret = (float64x1_t) __builtin_neon_vld1_dup_v(__p0, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vld1_dup_v(__p0, 10)); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld1_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vld1_dup_v(__p0, 12)); \ + __ret; \ +}) +#else +#define vld1_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8_t __ret; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vld1_dup_v(__p0, 12)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s1 = __p1; \ - __ret = (poly64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 38)); \ __ret; \ }) #else #define vld1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51204,16 +52358,34 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ - __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 42)); \ __ret; \ }) #else #define vld1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vld1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 44)); \ + __ret; \ +}) +#else +#define vld1q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __ret; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vld1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 44)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51221,9 +52393,27 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s1 = __p1; \ - __ret = (float64x1_t) __builtin_neon_vld1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 10)); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld1_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s1 = __p1; \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 12)); \ + __ret; \ +}) +#else +#define vld1_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __ret; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vld1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 12)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld1_p64_x2(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld1_x2_v(&__ret, __p0, 6); \ @@ -51240,8 +52430,8 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51257,8 +52447,25 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x2_t __ret; \ __builtin_neon_vld1q_x2_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_mf8_x2(__p0) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld1q_mf8_x2(__p0) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + __builtin_neon_vld1q_x2_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51268,6 +52475,23 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld1_x2_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld1_mf8_x2(__p0) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld1_mf8_x2(__p0) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + __builtin_neon_vld1_x2_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld1_p64_x3(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld1_x3_v(&__ret, __p0, 6); \ @@ -51284,9 +52508,9 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51302,9 +52526,27 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x3_t __ret; \ __builtin_neon_vld1q_x3_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_mf8_x3(__p0) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld1q_mf8_x3(__p0) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + __builtin_neon_vld1q_x3_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51314,6 +52556,24 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld1_x3_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld1_mf8_x3(__p0) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld1_mf8_x3(__p0) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + __builtin_neon_vld1_x3_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld1_p64_x4(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld1_x4_v(&__ret, __p0, 6); \ @@ -51330,10 +52590,10 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51349,10 +52609,29 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x4_t __ret; \ __builtin_neon_vld1q_x4_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld1q_mf8_x4(__p0) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld1q_mf8_x4(__p0) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + __builtin_neon_vld1q_x4_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51362,6 +52641,25 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld1_x4_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld1_mf8_x4(__p0) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld1_mf8_x4(__p0) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + __builtin_neon_vld1_x4_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld2_p64(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld2_v(&__ret, __p0, 6); \ @@ -51378,8 +52676,8 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51395,8 +52693,8 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51412,8 +52710,8 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51429,8 +52727,25 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int64x2x2_t __ret; \ __builtin_neon_vld2q_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld2q_mf8(__p0) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld2q_mf8(__p0) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + __builtin_neon_vld2q_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51440,6 +52755,23 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld2_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld2_mf8(__p0) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld2_mf8(__p0) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + __builtin_neon_vld2_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld2_dup_p64(__p0) __extension__ ({ \ poly64x1x2_t __ret; \ __builtin_neon_vld2_dup_v(&__ret, __p0, 6); \ @@ -51456,8 +52788,8 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51473,8 +52805,25 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x2_t __ret; \ __builtin_neon_vld2q_dup_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld2q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld2q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + __builtin_neon_vld2q_dup_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51484,17 +52833,34 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld2_dup_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld2_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld2_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + __builtin_neon_vld2_dup_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x2_t __ret; \ poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __ret; \ poly8x16x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 36); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 36); \ __ret; \ }) #else @@ -51502,12 +52868,12 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly8x16x2_t __ret; \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51516,7 +52882,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __ret; \ poly64x2x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 38); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 38); \ __ret; \ }) #else @@ -51524,12 +52890,12 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x2_t __ret; \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51538,7 +52904,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __ret; \ uint8x16x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 48); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 48); \ __ret; \ }) #else @@ -51546,12 +52912,12 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint8x16x2_t __ret; \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51560,7 +52926,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __ret; \ uint64x2x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 51); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 51); \ __ret; \ }) #else @@ -51568,12 +52934,12 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint64x2x2_t __ret; \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51582,7 +52948,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __ret; \ int8x16x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 32); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 32); \ __ret; \ }) #else @@ -51590,12 +52956,12 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int8x16x2_t __ret; \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51604,7 +52970,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __ret; \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 42); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 42); \ __ret; \ }) #else @@ -51612,12 +52978,12 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x2_t __ret; \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51626,7 +52992,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __ret; \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 35); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 35); \ __ret; \ }) #else @@ -51634,12 +53000,34 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int64x2x2_t __ret; \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vld2q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld2q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + mfloat8x16x2_t __s1 = __p1; \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 44); \ + __ret; \ +}) +#else +#define vld2q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x2_t __ret; \ + mfloat8x16x2_t __s1 = __p1; \ + mfloat8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vld2q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51647,21 +53035,43 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x2_t __ret; \ uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 19); \ __ret; \ }) #define vld2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x2_t __ret; \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 10); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 10); \ __ret; \ }) #define vld2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x2_t __ret; \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vld2_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 3); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 3); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld2_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + mfloat8x8x2_t __s1 = __p1; \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 12); \ + __ret; \ +}) +#else +#define vld2_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x2_t __ret; \ + mfloat8x8x2_t __s1 = __p1; \ + mfloat8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vld2_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld3_p64(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld3_v(&__ret, __p0, 6); \ @@ -51678,9 +53088,9 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51696,9 +53106,9 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51714,9 +53124,9 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51732,9 +53142,27 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int64x2x3_t __ret; \ __builtin_neon_vld3q_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_mf8(__p0) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld3q_mf8(__p0) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + __builtin_neon_vld3q_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51744,6 +53172,24 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld3_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld3_mf8(__p0) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld3_mf8(__p0) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + __builtin_neon_vld3_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld3_dup_p64(__p0) __extension__ ({ \ poly64x1x3_t __ret; \ __builtin_neon_vld3_dup_v(&__ret, __p0, 6); \ @@ -51760,9 +53206,9 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51778,9 +53224,27 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x3_t __ret; \ __builtin_neon_vld3q_dup_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld3q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + __builtin_neon_vld3q_dup_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51790,17 +53254,35 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld3_dup_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld3_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld3_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + __builtin_neon_vld3_dup_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x3_t __ret; \ poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __ret; \ poly8x16x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 36); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 36); \ __ret; \ }) #else @@ -51808,14 +53290,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly8x16x3_t __ret; \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51824,7 +53306,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __ret; \ poly64x2x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 38); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 38); \ __ret; \ }) #else @@ -51832,14 +53314,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x3_t __ret; \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51848,7 +53330,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __ret; \ uint8x16x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 48); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 48); \ __ret; \ }) #else @@ -51856,14 +53338,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint8x16x3_t __ret; \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51872,7 +53354,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __ret; \ uint64x2x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 51); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 51); \ __ret; \ }) #else @@ -51880,14 +53362,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint64x2x3_t __ret; \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51896,7 +53378,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __ret; \ int8x16x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 32); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 32); \ __ret; \ }) #else @@ -51904,14 +53386,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int8x16x3_t __ret; \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51920,7 +53402,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __ret; \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 42); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 42); \ __ret; \ }) #else @@ -51928,14 +53410,14 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x3_t __ret; \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -51944,7 +53426,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __ret; \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 35); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 35); \ __ret; \ }) #else @@ -51952,14 +53434,38 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int64x2x3_t __ret; \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vld3q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld3q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + mfloat8x16x3_t __s1 = __p1; \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 44); \ + __ret; \ +}) +#else +#define vld3q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x3_t __ret; \ + mfloat8x16x3_t __s1 = __p1; \ + mfloat8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vld3q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -51967,21 +53473,45 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x3_t __ret; \ uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 19); \ __ret; \ }) #define vld3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x3_t __ret; \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 10); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 10); \ __ret; \ }) #define vld3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x3_t __ret; \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vld3_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 3); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 3); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld3_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + mfloat8x8x3_t __s1 = __p1; \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 12); \ + __ret; \ +}) +#else +#define vld3_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x3_t __ret; \ + mfloat8x8x3_t __s1 = __p1; \ + mfloat8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vld3_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld4_p64(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld4_v(&__ret, __p0, 6); \ @@ -51998,10 +53528,10 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52017,10 +53547,10 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52036,10 +53566,10 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52055,10 +53585,29 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int64x2x4_t __ret; \ __builtin_neon_vld4q_v(&__ret, __p0, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_mf8(__p0) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld4q_mf8(__p0) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + __builtin_neon_vld4q_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -52068,6 +53617,25 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld4_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld4_mf8(__p0) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld4_mf8(__p0) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + __builtin_neon_vld4_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld4_dup_p64(__p0) __extension__ ({ \ poly64x1x4_t __ret; \ __builtin_neon_vld4_dup_v(&__ret, __p0, 6); \ @@ -52084,10 +53652,10 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52103,10 +53671,29 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x4_t __ret; \ __builtin_neon_vld4q_dup_v(&__ret, __p0, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 44); \ + __ret; \ +}) +#else +#define vld4q_dup_mf8(__p0) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + __builtin_neon_vld4q_dup_v(&__ret, __p0, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -52116,17 +53703,36 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) __builtin_neon_vld4_dup_v(&__ret, __p0, 10); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld4_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 12); \ + __ret; \ +}) +#else +#define vld4_dup_mf8(__p0) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + __builtin_neon_vld4_dup_v(&__ret, __p0, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vld4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x4_t __ret; \ poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 6); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vld4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __ret; \ poly8x16x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 36); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 36); \ __ret; \ }) #else @@ -52134,16 +53740,16 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly8x16x4_t __ret; \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 36); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -52152,7 +53758,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __ret; \ poly64x2x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 38); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 38); \ __ret; \ }) #else @@ -52160,16 +53766,16 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) poly64x2x4_t __ret; \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 38); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52178,7 +53784,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __ret; \ uint8x16x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 48); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 48); \ __ret; \ }) #else @@ -52186,16 +53792,16 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint8x16x4_t __ret; \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 48); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -52204,7 +53810,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __ret; \ uint64x2x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 51); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 51); \ __ret; \ }) #else @@ -52212,16 +53818,16 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) uint64x2x4_t __ret; \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 51); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52230,7 +53836,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __ret; \ int8x16x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 32); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 32); \ __ret; \ }) #else @@ -52238,16 +53844,16 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int8x16x4_t __ret; \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 32); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -52256,7 +53862,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __ret; \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 42); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 42); \ __ret; \ }) #else @@ -52264,16 +53870,16 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) float64x2x4_t __ret; \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 42); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ __ret; \ }) #endif @@ -52282,7 +53888,7 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __ret; \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 35); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 35); \ __ret; \ }) #else @@ -52290,16 +53896,42 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) int64x2x4_t __ret; \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vld4q_lane_v(&__ret, __p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 35); \ \ - __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], 1, 0); \ - __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], 1, 0); \ - __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], 1, 0); \ - __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], 1, 0); \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_64); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_64); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_64); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_64); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vld4q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + mfloat8x16x4_t __s1 = __p1; \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 44); \ + __ret; \ +}) +#else +#define vld4q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x4_t __ret; \ + mfloat8x16x4_t __s1 = __p1; \ + mfloat8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vld4q_lane_v(&__ret, __p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 44); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_128_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_128_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_128_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_128_8); \ __ret; \ }) #endif @@ -52307,59 +53939,85 @@ __ai __attribute__((target("neon"))) float64x1_t vget_low_f64(float64x2_t __p0) #define vld4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x4_t __ret; \ uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 19); \ __ret; \ }) #define vld4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x4_t __ret; \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 10); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 10); \ __ret; \ }) #define vld4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x4_t __ret; \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vld4_lane_v(&__ret, __p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 3); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 3); \ __ret; \ }) +#ifdef __LITTLE_ENDIAN__ +#define vld4_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + mfloat8x8x4_t __s1 = __p1; \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 12); \ + __ret; \ +}) +#else +#define vld4_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x4_t __ret; \ + mfloat8x8x4_t __s1 = __p1; \ + mfloat8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vld4_lane_v(&__ret, __p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 12); \ + \ + __ret.val[0] = __builtin_shufflevector(__ret.val[0], __ret.val[0], __lane_reverse_64_8); \ + __ret.val[1] = __builtin_shufflevector(__ret.val[1], __ret.val[1], __lane_reverse_64_8); \ + __ret.val[2] = __builtin_shufflevector(__ret.val[2], __ret.val[2], __lane_reverse_64_8); \ + __ret.val[3] = __builtin_shufflevector(__ret.val[3], __ret.val[3], __lane_reverse_64_8); \ + __ret; \ +}) +#endif + #define vldrq_p128(__p0) __extension__ ({ \ poly128_t __ret; \ - __ret = (poly128_t) __builtin_neon_vldrq_p128(__p0); \ + __ret = __builtin_bit_cast(poly128_t, __builtin_neon_vldrq_p128(__p0)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vmax_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmax_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vmaxnmvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vmaxnmvq_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vmaxnmvq_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vmaxnmvq_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vmaxnmvq_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vmaxnmvq_f64(__rev0)); return __ret; } #endif @@ -52367,14 +54025,14 @@ __ai __attribute__((target("neon"))) float64_t vmaxnmvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vmaxnmvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxnmvq_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxnmvq_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vmaxnmvq_f32(float32x4_t __p0) { float32_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32_t) __builtin_neon_vmaxnmvq_f32(__rev0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxnmvq_f32(__rev0)); return __ret; } #endif @@ -52382,14 +54040,14 @@ __ai __attribute__((target("neon"))) float32_t vmaxnmvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vmaxnmv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxnmv_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxnmv_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vmaxnmv_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vmaxnmv_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxnmv_f32(__rev0)); return __ret; } #endif @@ -52397,14 +54055,14 @@ __ai __attribute__((target("neon"))) float32_t vmaxnmv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8_t vmaxvq_u8(uint8x16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vmaxvq_u8(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vmaxvq_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8_t vmaxvq_u8(uint8x16_t __p0) { uint8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8_t) __builtin_neon_vmaxvq_u8(__rev0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vmaxvq_u8(__rev0)); return __ret; } #endif @@ -52412,14 +54070,14 @@ __ai __attribute__((target("neon"))) uint8_t vmaxvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vmaxvq_u32(uint32x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vmaxvq_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vmaxvq_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vmaxvq_u32(uint32x4_t __p0) { uint32_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32_t) __builtin_neon_vmaxvq_u32(__rev0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vmaxvq_u32(__rev0)); return __ret; } #endif @@ -52427,14 +54085,14 @@ __ai __attribute__((target("neon"))) uint32_t vmaxvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vmaxvq_u16(uint16x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vmaxvq_u16(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vmaxvq_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vmaxvq_u16(uint16x8_t __p0) { uint16_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vmaxvq_u16(__rev0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vmaxvq_u16(__rev0)); return __ret; } #endif @@ -52442,14 +54100,14 @@ __ai __attribute__((target("neon"))) uint16_t vmaxvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8_t vmaxvq_s8(int8x16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vmaxvq_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vmaxvq_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int8_t vmaxvq_s8(int8x16_t __p0) { int8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8_t) __builtin_neon_vmaxvq_s8(__rev0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vmaxvq_s8(__rev0)); return __ret; } #endif @@ -52457,14 +54115,14 @@ __ai __attribute__((target("neon"))) int8_t vmaxvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vmaxvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vmaxvq_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vmaxvq_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vmaxvq_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vmaxvq_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vmaxvq_f64(__rev0)); return __ret; } #endif @@ -52472,14 +54130,14 @@ __ai __attribute__((target("neon"))) float64_t vmaxvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vmaxvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxvq_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxvq_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vmaxvq_f32(float32x4_t __p0) { float32_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32_t) __builtin_neon_vmaxvq_f32(__rev0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxvq_f32(__rev0)); return __ret; } #endif @@ -52487,14 +54145,14 @@ __ai __attribute__((target("neon"))) float32_t vmaxvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vmaxvq_s32(int32x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vmaxvq_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vmaxvq_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vmaxvq_s32(int32x4_t __p0) { int32_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32_t) __builtin_neon_vmaxvq_s32(__rev0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vmaxvq_s32(__rev0)); return __ret; } #endif @@ -52502,14 +54160,14 @@ __ai __attribute__((target("neon"))) int32_t vmaxvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vmaxvq_s16(int16x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vmaxvq_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vmaxvq_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vmaxvq_s16(int16x8_t __p0) { int16_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vmaxvq_s16(__rev0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vmaxvq_s16(__rev0)); return __ret; } #endif @@ -52517,14 +54175,14 @@ __ai __attribute__((target("neon"))) int16_t vmaxvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8_t vmaxv_u8(uint8x8_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vmaxv_u8(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vmaxv_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8_t vmaxv_u8(uint8x8_t __p0) { uint8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8_t) __builtin_neon_vmaxv_u8(__rev0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vmaxv_u8(__rev0)); return __ret; } #endif @@ -52532,14 +54190,14 @@ __ai __attribute__((target("neon"))) uint8_t vmaxv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vmaxv_u32(uint32x2_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vmaxv_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vmaxv_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vmaxv_u32(uint32x2_t __p0) { uint32_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32_t) __builtin_neon_vmaxv_u32(__rev0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vmaxv_u32(__rev0)); return __ret; } #endif @@ -52547,14 +54205,14 @@ __ai __attribute__((target("neon"))) uint32_t vmaxv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vmaxv_u16(uint16x4_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vmaxv_u16(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vmaxv_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vmaxv_u16(uint16x4_t __p0) { uint16_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vmaxv_u16(__rev0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vmaxv_u16(__rev0)); return __ret; } #endif @@ -52562,14 +54220,14 @@ __ai __attribute__((target("neon"))) uint16_t vmaxv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8_t vmaxv_s8(int8x8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vmaxv_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vmaxv_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int8_t vmaxv_s8(int8x8_t __p0) { int8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8_t) __builtin_neon_vmaxv_s8(__rev0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vmaxv_s8(__rev0)); return __ret; } #endif @@ -52577,14 +54235,14 @@ __ai __attribute__((target("neon"))) int8_t vmaxv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vmaxv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmaxv_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxv_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vmaxv_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vmaxv_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmaxv_f32(__rev0)); return __ret; } #endif @@ -52592,14 +54250,14 @@ __ai __attribute__((target("neon"))) float32_t vmaxv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vmaxv_s32(int32x2_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vmaxv_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vmaxv_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vmaxv_s32(int32x2_t __p0) { int32_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32_t) __builtin_neon_vmaxv_s32(__rev0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vmaxv_s32(__rev0)); return __ret; } #endif @@ -52607,14 +54265,14 @@ __ai __attribute__((target("neon"))) int32_t vmaxv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vmaxv_s16(int16x4_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vmaxv_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vmaxv_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vmaxv_s16(int16x4_t __p0) { int16_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vmaxv_s16(__rev0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vmaxv_s16(__rev0)); return __ret; } #endif @@ -52622,36 +54280,36 @@ __ai __attribute__((target("neon"))) int16_t vmaxv_s16(int16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vmin_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmin_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vminnmvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vminnmvq_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vminnmvq_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vminnmvq_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vminnmvq_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vminnmvq_f64(__rev0)); return __ret; } #endif @@ -52659,14 +54317,14 @@ __ai __attribute__((target("neon"))) float64_t vminnmvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vminnmvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminnmvq_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminnmvq_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vminnmvq_f32(float32x4_t __p0) { float32_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32_t) __builtin_neon_vminnmvq_f32(__rev0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminnmvq_f32(__rev0)); return __ret; } #endif @@ -52674,14 +54332,14 @@ __ai __attribute__((target("neon"))) float32_t vminnmvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vminnmv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminnmv_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminnmv_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vminnmv_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vminnmv_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminnmv_f32(__rev0)); return __ret; } #endif @@ -52689,14 +54347,14 @@ __ai __attribute__((target("neon"))) float32_t vminnmv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8_t vminvq_u8(uint8x16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminvq_u8(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vminvq_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8_t vminvq_u8(uint8x16_t __p0) { uint8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8_t) __builtin_neon_vminvq_u8(__rev0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vminvq_u8(__rev0)); return __ret; } #endif @@ -52704,14 +54362,14 @@ __ai __attribute__((target("neon"))) uint8_t vminvq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vminvq_u32(uint32x4_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminvq_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vminvq_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vminvq_u32(uint32x4_t __p0) { uint32_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint32_t) __builtin_neon_vminvq_u32(__rev0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vminvq_u32(__rev0)); return __ret; } #endif @@ -52719,14 +54377,14 @@ __ai __attribute__((target("neon"))) uint32_t vminvq_u32(uint32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vminvq_u16(uint16x8_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminvq_u16(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vminvq_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vminvq_u16(uint16x8_t __p0) { uint16_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vminvq_u16(__rev0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vminvq_u16(__rev0)); return __ret; } #endif @@ -52734,14 +54392,14 @@ __ai __attribute__((target("neon"))) uint16_t vminvq_u16(uint16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8_t vminvq_s8(int8x16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vminvq_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vminvq_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int8_t vminvq_s8(int8x16_t __p0) { int8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8_t) __builtin_neon_vminvq_s8(__rev0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vminvq_s8(__rev0)); return __ret; } #endif @@ -52749,14 +54407,14 @@ __ai __attribute__((target("neon"))) int8_t vminvq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vminvq_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vminvq_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vminvq_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vminvq_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vminvq_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vminvq_f64(__rev0)); return __ret; } #endif @@ -52764,14 +54422,14 @@ __ai __attribute__((target("neon"))) float64_t vminvq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vminvq_f32(float32x4_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminvq_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminvq_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vminvq_f32(float32x4_t __p0) { float32_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32_t) __builtin_neon_vminvq_f32(__rev0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminvq_f32(__rev0)); return __ret; } #endif @@ -52779,14 +54437,14 @@ __ai __attribute__((target("neon"))) float32_t vminvq_f32(float32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vminvq_s32(int32x4_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vminvq_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vminvq_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vminvq_s32(int32x4_t __p0) { int32_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int32_t) __builtin_neon_vminvq_s32(__rev0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vminvq_s32(__rev0)); return __ret; } #endif @@ -52794,14 +54452,14 @@ __ai __attribute__((target("neon"))) int32_t vminvq_s32(int32x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vminvq_s16(int16x8_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vminvq_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vminvq_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vminvq_s16(int16x8_t __p0) { int16_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vminvq_s16(__rev0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vminvq_s16(__rev0)); return __ret; } #endif @@ -52809,14 +54467,14 @@ __ai __attribute__((target("neon"))) int16_t vminvq_s16(int16x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8_t vminv_u8(uint8x8_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vminv_u8(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vminv_u8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint8_t vminv_u8(uint8x8_t __p0) { uint8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8_t) __builtin_neon_vminv_u8(__rev0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vminv_u8(__rev0)); return __ret; } #endif @@ -52824,14 +54482,14 @@ __ai __attribute__((target("neon"))) uint8_t vminv_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32_t vminv_u32(uint32x2_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vminv_u32(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vminv_u32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint32_t vminv_u32(uint32x2_t __p0) { uint32_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint32_t) __builtin_neon_vminv_u32(__rev0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vminv_u32(__rev0)); return __ret; } #endif @@ -52839,14 +54497,14 @@ __ai __attribute__((target("neon"))) uint32_t vminv_u32(uint32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16_t vminv_u16(uint16x4_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vminv_u16(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vminv_u16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint16_t vminv_u16(uint16x4_t __p0) { uint16_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (uint16_t) __builtin_neon_vminv_u16(__rev0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vminv_u16(__rev0)); return __ret; } #endif @@ -52854,14 +54512,14 @@ __ai __attribute__((target("neon"))) uint16_t vminv_u16(uint16x4_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8_t vminv_s8(int8x8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vminv_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vminv_s8(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int8_t vminv_s8(int8x8_t __p0) { int8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8_t) __builtin_neon_vminv_s8(__rev0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vminv_s8(__rev0)); return __ret; } #endif @@ -52869,14 +54527,14 @@ __ai __attribute__((target("neon"))) int8_t vminv_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vminv_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vminv_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminv_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vminv_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vminv_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vminv_f32(__rev0)); return __ret; } #endif @@ -52884,14 +54542,14 @@ __ai __attribute__((target("neon"))) float32_t vminv_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32_t vminv_s32(int32x2_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vminv_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vminv_s32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int32_t vminv_s32(int32x2_t __p0) { int32_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int32_t) __builtin_neon_vminv_s32(__rev0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vminv_s32(__rev0)); return __ret; } #endif @@ -52899,14 +54557,14 @@ __ai __attribute__((target("neon"))) int32_t vminv_s32(int32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16_t vminv_s16(int16x4_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vminv_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vminv_s16(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int16_t vminv_s16(int16x4_t __p0) { int16_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (int16_t) __builtin_neon_vminv_s16(__rev0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vminv_s16(__rev0)); return __ret; } #endif @@ -52920,11 +54578,11 @@ __ai __attribute__((target("neon"))) float64x2_t vmlaq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __rev0 + __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -52935,533 +54593,533 @@ __ai __attribute__((target("neon"))) float64x1_t vmla_f64(float64x1_t __p0, floa return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_u32(__p0_448, __p1_448, __p2_448, __p3_448) __extension__ ({ \ - uint32x4_t __ret_448; \ - uint32x4_t __s0_448 = __p0_448; \ - uint32x4_t __s1_448 = __p1_448; \ - uint32x4_t __s2_448 = __p2_448; \ - __ret_448 = __s0_448 + __s1_448 * splatq_laneq_u32(__s2_448, __p3_448); \ - __ret_448; \ -}) -#else -#define vmlaq_laneq_u32(__p0_449, __p1_449, __p2_449, __p3_449) __extension__ ({ \ - uint32x4_t __ret_449; \ - uint32x4_t __s0_449 = __p0_449; \ - uint32x4_t __s1_449 = __p1_449; \ - uint32x4_t __s2_449 = __p2_449; \ - uint32x4_t __rev0_449; __rev0_449 = __builtin_shufflevector(__s0_449, __s0_449, 3, 2, 1, 0); \ - uint32x4_t __rev1_449; __rev1_449 = __builtin_shufflevector(__s1_449, __s1_449, 3, 2, 1, 0); \ - uint32x4_t __rev2_449; __rev2_449 = __builtin_shufflevector(__s2_449, __s2_449, 3, 2, 1, 0); \ - __ret_449 = __rev0_449 + __rev1_449 * __noswap_splatq_laneq_u32(__rev2_449, __p3_449); \ - __ret_449 = __builtin_shufflevector(__ret_449, __ret_449, 3, 2, 1, 0); \ - __ret_449; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_u16(__p0_450, __p1_450, __p2_450, __p3_450) __extension__ ({ \ - uint16x8_t __ret_450; \ - uint16x8_t __s0_450 = __p0_450; \ - uint16x8_t __s1_450 = __p1_450; \ - uint16x8_t __s2_450 = __p2_450; \ - __ret_450 = __s0_450 + __s1_450 * splatq_laneq_u16(__s2_450, __p3_450); \ - __ret_450; \ -}) -#else -#define vmlaq_laneq_u16(__p0_451, __p1_451, __p2_451, __p3_451) __extension__ ({ \ - uint16x8_t __ret_451; \ - uint16x8_t __s0_451 = __p0_451; \ - uint16x8_t __s1_451 = __p1_451; \ - uint16x8_t __s2_451 = __p2_451; \ - uint16x8_t __rev0_451; __rev0_451 = __builtin_shufflevector(__s0_451, __s0_451, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_451; __rev1_451 = __builtin_shufflevector(__s1_451, __s1_451, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_451; __rev2_451 = __builtin_shufflevector(__s2_451, __s2_451, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_451 = __rev0_451 + __rev1_451 * __noswap_splatq_laneq_u16(__rev2_451, __p3_451); \ - __ret_451 = __builtin_shufflevector(__ret_451, __ret_451, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_451; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_f32(__p0_452, __p1_452, __p2_452, __p3_452) __extension__ ({ \ - float32x4_t __ret_452; \ - float32x4_t __s0_452 = __p0_452; \ - float32x4_t __s1_452 = __p1_452; \ - float32x4_t __s2_452 = __p2_452; \ - __ret_452 = __s0_452 + __s1_452 * splatq_laneq_f32(__s2_452, __p3_452); \ - __ret_452; \ -}) -#else -#define vmlaq_laneq_f32(__p0_453, __p1_453, __p2_453, __p3_453) __extension__ ({ \ - float32x4_t __ret_453; \ - float32x4_t __s0_453 = __p0_453; \ - float32x4_t __s1_453 = __p1_453; \ - float32x4_t __s2_453 = __p2_453; \ - float32x4_t __rev0_453; __rev0_453 = __builtin_shufflevector(__s0_453, __s0_453, 3, 2, 1, 0); \ - float32x4_t __rev1_453; __rev1_453 = __builtin_shufflevector(__s1_453, __s1_453, 3, 2, 1, 0); \ - float32x4_t __rev2_453; __rev2_453 = __builtin_shufflevector(__s2_453, __s2_453, 3, 2, 1, 0); \ - __ret_453 = __rev0_453 + __rev1_453 * __noswap_splatq_laneq_f32(__rev2_453, __p3_453); \ - __ret_453 = __builtin_shufflevector(__ret_453, __ret_453, 3, 2, 1, 0); \ - __ret_453; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_s32(__p0_454, __p1_454, __p2_454, __p3_454) __extension__ ({ \ - int32x4_t __ret_454; \ - int32x4_t __s0_454 = __p0_454; \ - int32x4_t __s1_454 = __p1_454; \ - int32x4_t __s2_454 = __p2_454; \ - __ret_454 = __s0_454 + __s1_454 * splatq_laneq_s32(__s2_454, __p3_454); \ - __ret_454; \ -}) -#else -#define vmlaq_laneq_s32(__p0_455, __p1_455, __p2_455, __p3_455) __extension__ ({ \ - int32x4_t __ret_455; \ - int32x4_t __s0_455 = __p0_455; \ - int32x4_t __s1_455 = __p1_455; \ - int32x4_t __s2_455 = __p2_455; \ - int32x4_t __rev0_455; __rev0_455 = __builtin_shufflevector(__s0_455, __s0_455, 3, 2, 1, 0); \ - int32x4_t __rev1_455; __rev1_455 = __builtin_shufflevector(__s1_455, __s1_455, 3, 2, 1, 0); \ - int32x4_t __rev2_455; __rev2_455 = __builtin_shufflevector(__s2_455, __s2_455, 3, 2, 1, 0); \ - __ret_455 = __rev0_455 + __rev1_455 * __noswap_splatq_laneq_s32(__rev2_455, __p3_455); \ - __ret_455 = __builtin_shufflevector(__ret_455, __ret_455, 3, 2, 1, 0); \ - __ret_455; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_s16(__p0_456, __p1_456, __p2_456, __p3_456) __extension__ ({ \ - int16x8_t __ret_456; \ - int16x8_t __s0_456 = __p0_456; \ - int16x8_t __s1_456 = __p1_456; \ - int16x8_t __s2_456 = __p2_456; \ - __ret_456 = __s0_456 + __s1_456 * splatq_laneq_s16(__s2_456, __p3_456); \ - __ret_456; \ -}) -#else -#define vmlaq_laneq_s16(__p0_457, __p1_457, __p2_457, __p3_457) __extension__ ({ \ - int16x8_t __ret_457; \ - int16x8_t __s0_457 = __p0_457; \ - int16x8_t __s1_457 = __p1_457; \ - int16x8_t __s2_457 = __p2_457; \ - int16x8_t __rev0_457; __rev0_457 = __builtin_shufflevector(__s0_457, __s0_457, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_457; __rev1_457 = __builtin_shufflevector(__s1_457, __s1_457, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_457; __rev2_457 = __builtin_shufflevector(__s2_457, __s2_457, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_457 = __rev0_457 + __rev1_457 * __noswap_splatq_laneq_s16(__rev2_457, __p3_457); \ - __ret_457 = __builtin_shufflevector(__ret_457, __ret_457, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_457; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_u32(__p0_458, __p1_458, __p2_458, __p3_458) __extension__ ({ \ - uint32x2_t __ret_458; \ - uint32x2_t __s0_458 = __p0_458; \ - uint32x2_t __s1_458 = __p1_458; \ - uint32x4_t __s2_458 = __p2_458; \ - __ret_458 = __s0_458 + __s1_458 * splat_laneq_u32(__s2_458, __p3_458); \ - __ret_458; \ -}) -#else -#define vmla_laneq_u32(__p0_459, __p1_459, __p2_459, __p3_459) __extension__ ({ \ - uint32x2_t __ret_459; \ - uint32x2_t __s0_459 = __p0_459; \ - uint32x2_t __s1_459 = __p1_459; \ - uint32x4_t __s2_459 = __p2_459; \ - uint32x2_t __rev0_459; __rev0_459 = __builtin_shufflevector(__s0_459, __s0_459, 1, 0); \ - uint32x2_t __rev1_459; __rev1_459 = __builtin_shufflevector(__s1_459, __s1_459, 1, 0); \ - uint32x4_t __rev2_459; __rev2_459 = __builtin_shufflevector(__s2_459, __s2_459, 3, 2, 1, 0); \ - __ret_459 = __rev0_459 + __rev1_459 * __noswap_splat_laneq_u32(__rev2_459, __p3_459); \ - __ret_459 = __builtin_shufflevector(__ret_459, __ret_459, 1, 0); \ - __ret_459; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_u16(__p0_460, __p1_460, __p2_460, __p3_460) __extension__ ({ \ - uint16x4_t __ret_460; \ - uint16x4_t __s0_460 = __p0_460; \ - uint16x4_t __s1_460 = __p1_460; \ - uint16x8_t __s2_460 = __p2_460; \ - __ret_460 = __s0_460 + __s1_460 * splat_laneq_u16(__s2_460, __p3_460); \ - __ret_460; \ -}) -#else -#define vmla_laneq_u16(__p0_461, __p1_461, __p2_461, __p3_461) __extension__ ({ \ - uint16x4_t __ret_461; \ - uint16x4_t __s0_461 = __p0_461; \ - uint16x4_t __s1_461 = __p1_461; \ - uint16x8_t __s2_461 = __p2_461; \ - uint16x4_t __rev0_461; __rev0_461 = __builtin_shufflevector(__s0_461, __s0_461, 3, 2, 1, 0); \ - uint16x4_t __rev1_461; __rev1_461 = __builtin_shufflevector(__s1_461, __s1_461, 3, 2, 1, 0); \ - uint16x8_t __rev2_461; __rev2_461 = __builtin_shufflevector(__s2_461, __s2_461, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_461 = __rev0_461 + __rev1_461 * __noswap_splat_laneq_u16(__rev2_461, __p3_461); \ - __ret_461 = __builtin_shufflevector(__ret_461, __ret_461, 3, 2, 1, 0); \ - __ret_461; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_f32(__p0_462, __p1_462, __p2_462, __p3_462) __extension__ ({ \ - float32x2_t __ret_462; \ - float32x2_t __s0_462 = __p0_462; \ - float32x2_t __s1_462 = __p1_462; \ - float32x4_t __s2_462 = __p2_462; \ - __ret_462 = __s0_462 + __s1_462 * splat_laneq_f32(__s2_462, __p3_462); \ - __ret_462; \ -}) -#else -#define vmla_laneq_f32(__p0_463, __p1_463, __p2_463, __p3_463) __extension__ ({ \ - float32x2_t __ret_463; \ - float32x2_t __s0_463 = __p0_463; \ - float32x2_t __s1_463 = __p1_463; \ - float32x4_t __s2_463 = __p2_463; \ - float32x2_t __rev0_463; __rev0_463 = __builtin_shufflevector(__s0_463, __s0_463, 1, 0); \ - float32x2_t __rev1_463; __rev1_463 = __builtin_shufflevector(__s1_463, __s1_463, 1, 0); \ - float32x4_t __rev2_463; __rev2_463 = __builtin_shufflevector(__s2_463, __s2_463, 3, 2, 1, 0); \ - __ret_463 = __rev0_463 + __rev1_463 * __noswap_splat_laneq_f32(__rev2_463, __p3_463); \ - __ret_463 = __builtin_shufflevector(__ret_463, __ret_463, 1, 0); \ - __ret_463; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_s32(__p0_464, __p1_464, __p2_464, __p3_464) __extension__ ({ \ - int32x2_t __ret_464; \ - int32x2_t __s0_464 = __p0_464; \ - int32x2_t __s1_464 = __p1_464; \ - int32x4_t __s2_464 = __p2_464; \ - __ret_464 = __s0_464 + __s1_464 * splat_laneq_s32(__s2_464, __p3_464); \ +#define vmlaq_laneq_u32(__p0_464, __p1_464, __p2_464, __p3_464) __extension__ ({ \ + uint32x4_t __ret_464; \ + uint32x4_t __s0_464 = __p0_464; \ + uint32x4_t __s1_464 = __p1_464; \ + uint32x4_t __s2_464 = __p2_464; \ + __ret_464 = __s0_464 + __s1_464 * splatq_laneq_u32(__s2_464, __p3_464); \ __ret_464; \ }) #else -#define vmla_laneq_s32(__p0_465, __p1_465, __p2_465, __p3_465) __extension__ ({ \ - int32x2_t __ret_465; \ - int32x2_t __s0_465 = __p0_465; \ - int32x2_t __s1_465 = __p1_465; \ - int32x4_t __s2_465 = __p2_465; \ - int32x2_t __rev0_465; __rev0_465 = __builtin_shufflevector(__s0_465, __s0_465, 1, 0); \ - int32x2_t __rev1_465; __rev1_465 = __builtin_shufflevector(__s1_465, __s1_465, 1, 0); \ - int32x4_t __rev2_465; __rev2_465 = __builtin_shufflevector(__s2_465, __s2_465, 3, 2, 1, 0); \ - __ret_465 = __rev0_465 + __rev1_465 * __noswap_splat_laneq_s32(__rev2_465, __p3_465); \ - __ret_465 = __builtin_shufflevector(__ret_465, __ret_465, 1, 0); \ +#define vmlaq_laneq_u32(__p0_465, __p1_465, __p2_465, __p3_465) __extension__ ({ \ + uint32x4_t __ret_465; \ + uint32x4_t __s0_465 = __p0_465; \ + uint32x4_t __s1_465 = __p1_465; \ + uint32x4_t __s2_465 = __p2_465; \ + uint32x4_t __rev0_465; __rev0_465 = __builtin_shufflevector(__s0_465, __s0_465, __lane_reverse_128_32); \ + uint32x4_t __rev1_465; __rev1_465 = __builtin_shufflevector(__s1_465, __s1_465, __lane_reverse_128_32); \ + uint32x4_t __rev2_465; __rev2_465 = __builtin_shufflevector(__s2_465, __s2_465, __lane_reverse_128_32); \ + __ret_465 = __rev0_465 + __rev1_465 * __noswap_splatq_laneq_u32(__rev2_465, __p3_465); \ + __ret_465 = __builtin_shufflevector(__ret_465, __ret_465, __lane_reverse_128_32); \ __ret_465; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_s16(__p0_466, __p1_466, __p2_466, __p3_466) __extension__ ({ \ - int16x4_t __ret_466; \ - int16x4_t __s0_466 = __p0_466; \ - int16x4_t __s1_466 = __p1_466; \ - int16x8_t __s2_466 = __p2_466; \ - __ret_466 = __s0_466 + __s1_466 * splat_laneq_s16(__s2_466, __p3_466); \ +#define vmlaq_laneq_u16(__p0_466, __p1_466, __p2_466, __p3_466) __extension__ ({ \ + uint16x8_t __ret_466; \ + uint16x8_t __s0_466 = __p0_466; \ + uint16x8_t __s1_466 = __p1_466; \ + uint16x8_t __s2_466 = __p2_466; \ + __ret_466 = __s0_466 + __s1_466 * splatq_laneq_u16(__s2_466, __p3_466); \ __ret_466; \ }) #else -#define vmla_laneq_s16(__p0_467, __p1_467, __p2_467, __p3_467) __extension__ ({ \ - int16x4_t __ret_467; \ - int16x4_t __s0_467 = __p0_467; \ - int16x4_t __s1_467 = __p1_467; \ - int16x8_t __s2_467 = __p2_467; \ - int16x4_t __rev0_467; __rev0_467 = __builtin_shufflevector(__s0_467, __s0_467, 3, 2, 1, 0); \ - int16x4_t __rev1_467; __rev1_467 = __builtin_shufflevector(__s1_467, __s1_467, 3, 2, 1, 0); \ - int16x8_t __rev2_467; __rev2_467 = __builtin_shufflevector(__s2_467, __s2_467, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_467 = __rev0_467 + __rev1_467 * __noswap_splat_laneq_s16(__rev2_467, __p3_467); \ - __ret_467 = __builtin_shufflevector(__ret_467, __ret_467, 3, 2, 1, 0); \ +#define vmlaq_laneq_u16(__p0_467, __p1_467, __p2_467, __p3_467) __extension__ ({ \ + uint16x8_t __ret_467; \ + uint16x8_t __s0_467 = __p0_467; \ + uint16x8_t __s1_467 = __p1_467; \ + uint16x8_t __s2_467 = __p2_467; \ + uint16x8_t __rev0_467; __rev0_467 = __builtin_shufflevector(__s0_467, __s0_467, __lane_reverse_128_16); \ + uint16x8_t __rev1_467; __rev1_467 = __builtin_shufflevector(__s1_467, __s1_467, __lane_reverse_128_16); \ + uint16x8_t __rev2_467; __rev2_467 = __builtin_shufflevector(__s2_467, __s2_467, __lane_reverse_128_16); \ + __ret_467 = __rev0_467 + __rev1_467 * __noswap_splatq_laneq_u16(__rev2_467, __p3_467); \ + __ret_467 = __builtin_shufflevector(__ret_467, __ret_467, __lane_reverse_128_16); \ __ret_467; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_u32(__p0_468, __p1_468, __p2_468, __p3_468) __extension__ ({ \ - uint64x2_t __ret_468; \ - uint64x2_t __s0_468 = __p0_468; \ - uint32x4_t __s1_468 = __p1_468; \ - uint32x2_t __s2_468 = __p2_468; \ - __ret_468 = __s0_468 + vmull_u32(vget_high_u32(__s1_468), splat_lane_u32(__s2_468, __p3_468)); \ +#define vmlaq_laneq_f32(__p0_468, __p1_468, __p2_468, __p3_468) __extension__ ({ \ + float32x4_t __ret_468; \ + float32x4_t __s0_468 = __p0_468; \ + float32x4_t __s1_468 = __p1_468; \ + float32x4_t __s2_468 = __p2_468; \ + __ret_468 = __s0_468 + __s1_468 * splatq_laneq_f32(__s2_468, __p3_468); \ __ret_468; \ }) #else -#define vmlal_high_lane_u32(__p0_469, __p1_469, __p2_469, __p3_469) __extension__ ({ \ - uint64x2_t __ret_469; \ - uint64x2_t __s0_469 = __p0_469; \ - uint32x4_t __s1_469 = __p1_469; \ - uint32x2_t __s2_469 = __p2_469; \ - uint64x2_t __rev0_469; __rev0_469 = __builtin_shufflevector(__s0_469, __s0_469, 1, 0); \ - uint32x4_t __rev1_469; __rev1_469 = __builtin_shufflevector(__s1_469, __s1_469, 3, 2, 1, 0); \ - uint32x2_t __rev2_469; __rev2_469 = __builtin_shufflevector(__s2_469, __s2_469, 1, 0); \ - __ret_469 = __rev0_469 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_469), __noswap_splat_lane_u32(__rev2_469, __p3_469)); \ - __ret_469 = __builtin_shufflevector(__ret_469, __ret_469, 1, 0); \ +#define vmlaq_laneq_f32(__p0_469, __p1_469, __p2_469, __p3_469) __extension__ ({ \ + float32x4_t __ret_469; \ + float32x4_t __s0_469 = __p0_469; \ + float32x4_t __s1_469 = __p1_469; \ + float32x4_t __s2_469 = __p2_469; \ + float32x4_t __rev0_469; __rev0_469 = __builtin_shufflevector(__s0_469, __s0_469, __lane_reverse_128_32); \ + float32x4_t __rev1_469; __rev1_469 = __builtin_shufflevector(__s1_469, __s1_469, __lane_reverse_128_32); \ + float32x4_t __rev2_469; __rev2_469 = __builtin_shufflevector(__s2_469, __s2_469, __lane_reverse_128_32); \ + __ret_469 = __rev0_469 + __rev1_469 * __noswap_splatq_laneq_f32(__rev2_469, __p3_469); \ + __ret_469 = __builtin_shufflevector(__ret_469, __ret_469, __lane_reverse_128_32); \ __ret_469; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_u16(__p0_470, __p1_470, __p2_470, __p3_470) __extension__ ({ \ - uint32x4_t __ret_470; \ - uint32x4_t __s0_470 = __p0_470; \ - uint16x8_t __s1_470 = __p1_470; \ - uint16x4_t __s2_470 = __p2_470; \ - __ret_470 = __s0_470 + vmull_u16(vget_high_u16(__s1_470), splat_lane_u16(__s2_470, __p3_470)); \ +#define vmlaq_laneq_s32(__p0_470, __p1_470, __p2_470, __p3_470) __extension__ ({ \ + int32x4_t __ret_470; \ + int32x4_t __s0_470 = __p0_470; \ + int32x4_t __s1_470 = __p1_470; \ + int32x4_t __s2_470 = __p2_470; \ + __ret_470 = __s0_470 + __s1_470 * splatq_laneq_s32(__s2_470, __p3_470); \ __ret_470; \ }) #else -#define vmlal_high_lane_u16(__p0_471, __p1_471, __p2_471, __p3_471) __extension__ ({ \ - uint32x4_t __ret_471; \ - uint32x4_t __s0_471 = __p0_471; \ - uint16x8_t __s1_471 = __p1_471; \ - uint16x4_t __s2_471 = __p2_471; \ - uint32x4_t __rev0_471; __rev0_471 = __builtin_shufflevector(__s0_471, __s0_471, 3, 2, 1, 0); \ - uint16x8_t __rev1_471; __rev1_471 = __builtin_shufflevector(__s1_471, __s1_471, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_471; __rev2_471 = __builtin_shufflevector(__s2_471, __s2_471, 3, 2, 1, 0); \ - __ret_471 = __rev0_471 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_471), __noswap_splat_lane_u16(__rev2_471, __p3_471)); \ - __ret_471 = __builtin_shufflevector(__ret_471, __ret_471, 3, 2, 1, 0); \ +#define vmlaq_laneq_s32(__p0_471, __p1_471, __p2_471, __p3_471) __extension__ ({ \ + int32x4_t __ret_471; \ + int32x4_t __s0_471 = __p0_471; \ + int32x4_t __s1_471 = __p1_471; \ + int32x4_t __s2_471 = __p2_471; \ + int32x4_t __rev0_471; __rev0_471 = __builtin_shufflevector(__s0_471, __s0_471, __lane_reverse_128_32); \ + int32x4_t __rev1_471; __rev1_471 = __builtin_shufflevector(__s1_471, __s1_471, __lane_reverse_128_32); \ + int32x4_t __rev2_471; __rev2_471 = __builtin_shufflevector(__s2_471, __s2_471, __lane_reverse_128_32); \ + __ret_471 = __rev0_471 + __rev1_471 * __noswap_splatq_laneq_s32(__rev2_471, __p3_471); \ + __ret_471 = __builtin_shufflevector(__ret_471, __ret_471, __lane_reverse_128_32); \ __ret_471; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_s32(__p0_472, __p1_472, __p2_472, __p3_472) __extension__ ({ \ - int64x2_t __ret_472; \ - int64x2_t __s0_472 = __p0_472; \ - int32x4_t __s1_472 = __p1_472; \ - int32x2_t __s2_472 = __p2_472; \ - __ret_472 = __s0_472 + vmull_s32(vget_high_s32(__s1_472), splat_lane_s32(__s2_472, __p3_472)); \ +#define vmlaq_laneq_s16(__p0_472, __p1_472, __p2_472, __p3_472) __extension__ ({ \ + int16x8_t __ret_472; \ + int16x8_t __s0_472 = __p0_472; \ + int16x8_t __s1_472 = __p1_472; \ + int16x8_t __s2_472 = __p2_472; \ + __ret_472 = __s0_472 + __s1_472 * splatq_laneq_s16(__s2_472, __p3_472); \ __ret_472; \ }) #else -#define vmlal_high_lane_s32(__p0_473, __p1_473, __p2_473, __p3_473) __extension__ ({ \ - int64x2_t __ret_473; \ - int64x2_t __s0_473 = __p0_473; \ - int32x4_t __s1_473 = __p1_473; \ - int32x2_t __s2_473 = __p2_473; \ - int64x2_t __rev0_473; __rev0_473 = __builtin_shufflevector(__s0_473, __s0_473, 1, 0); \ - int32x4_t __rev1_473; __rev1_473 = __builtin_shufflevector(__s1_473, __s1_473, 3, 2, 1, 0); \ - int32x2_t __rev2_473; __rev2_473 = __builtin_shufflevector(__s2_473, __s2_473, 1, 0); \ - __ret_473 = __rev0_473 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_473), __noswap_splat_lane_s32(__rev2_473, __p3_473)); \ - __ret_473 = __builtin_shufflevector(__ret_473, __ret_473, 1, 0); \ +#define vmlaq_laneq_s16(__p0_473, __p1_473, __p2_473, __p3_473) __extension__ ({ \ + int16x8_t __ret_473; \ + int16x8_t __s0_473 = __p0_473; \ + int16x8_t __s1_473 = __p1_473; \ + int16x8_t __s2_473 = __p2_473; \ + int16x8_t __rev0_473; __rev0_473 = __builtin_shufflevector(__s0_473, __s0_473, __lane_reverse_128_16); \ + int16x8_t __rev1_473; __rev1_473 = __builtin_shufflevector(__s1_473, __s1_473, __lane_reverse_128_16); \ + int16x8_t __rev2_473; __rev2_473 = __builtin_shufflevector(__s2_473, __s2_473, __lane_reverse_128_16); \ + __ret_473 = __rev0_473 + __rev1_473 * __noswap_splatq_laneq_s16(__rev2_473, __p3_473); \ + __ret_473 = __builtin_shufflevector(__ret_473, __ret_473, __lane_reverse_128_16); \ __ret_473; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_s16(__p0_474, __p1_474, __p2_474, __p3_474) __extension__ ({ \ - int32x4_t __ret_474; \ - int32x4_t __s0_474 = __p0_474; \ - int16x8_t __s1_474 = __p1_474; \ - int16x4_t __s2_474 = __p2_474; \ - __ret_474 = __s0_474 + vmull_s16(vget_high_s16(__s1_474), splat_lane_s16(__s2_474, __p3_474)); \ +#define vmla_laneq_u32(__p0_474, __p1_474, __p2_474, __p3_474) __extension__ ({ \ + uint32x2_t __ret_474; \ + uint32x2_t __s0_474 = __p0_474; \ + uint32x2_t __s1_474 = __p1_474; \ + uint32x4_t __s2_474 = __p2_474; \ + __ret_474 = __s0_474 + __s1_474 * splat_laneq_u32(__s2_474, __p3_474); \ __ret_474; \ }) #else -#define vmlal_high_lane_s16(__p0_475, __p1_475, __p2_475, __p3_475) __extension__ ({ \ - int32x4_t __ret_475; \ - int32x4_t __s0_475 = __p0_475; \ - int16x8_t __s1_475 = __p1_475; \ - int16x4_t __s2_475 = __p2_475; \ - int32x4_t __rev0_475; __rev0_475 = __builtin_shufflevector(__s0_475, __s0_475, 3, 2, 1, 0); \ - int16x8_t __rev1_475; __rev1_475 = __builtin_shufflevector(__s1_475, __s1_475, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_475; __rev2_475 = __builtin_shufflevector(__s2_475, __s2_475, 3, 2, 1, 0); \ - __ret_475 = __rev0_475 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_475), __noswap_splat_lane_s16(__rev2_475, __p3_475)); \ - __ret_475 = __builtin_shufflevector(__ret_475, __ret_475, 3, 2, 1, 0); \ +#define vmla_laneq_u32(__p0_475, __p1_475, __p2_475, __p3_475) __extension__ ({ \ + uint32x2_t __ret_475; \ + uint32x2_t __s0_475 = __p0_475; \ + uint32x2_t __s1_475 = __p1_475; \ + uint32x4_t __s2_475 = __p2_475; \ + uint32x2_t __rev0_475; __rev0_475 = __builtin_shufflevector(__s0_475, __s0_475, __lane_reverse_64_32); \ + uint32x2_t __rev1_475; __rev1_475 = __builtin_shufflevector(__s1_475, __s1_475, __lane_reverse_64_32); \ + uint32x4_t __rev2_475; __rev2_475 = __builtin_shufflevector(__s2_475, __s2_475, __lane_reverse_128_32); \ + __ret_475 = __rev0_475 + __rev1_475 * __noswap_splat_laneq_u32(__rev2_475, __p3_475); \ + __ret_475 = __builtin_shufflevector(__ret_475, __ret_475, __lane_reverse_64_32); \ __ret_475; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_u32(__p0_476, __p1_476, __p2_476, __p3_476) __extension__ ({ \ - uint64x2_t __ret_476; \ - uint64x2_t __s0_476 = __p0_476; \ - uint32x4_t __s1_476 = __p1_476; \ - uint32x4_t __s2_476 = __p2_476; \ - __ret_476 = __s0_476 + vmull_u32(vget_high_u32(__s1_476), splat_laneq_u32(__s2_476, __p3_476)); \ +#define vmla_laneq_u16(__p0_476, __p1_476, __p2_476, __p3_476) __extension__ ({ \ + uint16x4_t __ret_476; \ + uint16x4_t __s0_476 = __p0_476; \ + uint16x4_t __s1_476 = __p1_476; \ + uint16x8_t __s2_476 = __p2_476; \ + __ret_476 = __s0_476 + __s1_476 * splat_laneq_u16(__s2_476, __p3_476); \ __ret_476; \ }) #else -#define vmlal_high_laneq_u32(__p0_477, __p1_477, __p2_477, __p3_477) __extension__ ({ \ - uint64x2_t __ret_477; \ - uint64x2_t __s0_477 = __p0_477; \ - uint32x4_t __s1_477 = __p1_477; \ - uint32x4_t __s2_477 = __p2_477; \ - uint64x2_t __rev0_477; __rev0_477 = __builtin_shufflevector(__s0_477, __s0_477, 1, 0); \ - uint32x4_t __rev1_477; __rev1_477 = __builtin_shufflevector(__s1_477, __s1_477, 3, 2, 1, 0); \ - uint32x4_t __rev2_477; __rev2_477 = __builtin_shufflevector(__s2_477, __s2_477, 3, 2, 1, 0); \ - __ret_477 = __rev0_477 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_477), __noswap_splat_laneq_u32(__rev2_477, __p3_477)); \ - __ret_477 = __builtin_shufflevector(__ret_477, __ret_477, 1, 0); \ +#define vmla_laneq_u16(__p0_477, __p1_477, __p2_477, __p3_477) __extension__ ({ \ + uint16x4_t __ret_477; \ + uint16x4_t __s0_477 = __p0_477; \ + uint16x4_t __s1_477 = __p1_477; \ + uint16x8_t __s2_477 = __p2_477; \ + uint16x4_t __rev0_477; __rev0_477 = __builtin_shufflevector(__s0_477, __s0_477, __lane_reverse_64_16); \ + uint16x4_t __rev1_477; __rev1_477 = __builtin_shufflevector(__s1_477, __s1_477, __lane_reverse_64_16); \ + uint16x8_t __rev2_477; __rev2_477 = __builtin_shufflevector(__s2_477, __s2_477, __lane_reverse_128_16); \ + __ret_477 = __rev0_477 + __rev1_477 * __noswap_splat_laneq_u16(__rev2_477, __p3_477); \ + __ret_477 = __builtin_shufflevector(__ret_477, __ret_477, __lane_reverse_64_16); \ __ret_477; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_u16(__p0_478, __p1_478, __p2_478, __p3_478) __extension__ ({ \ - uint32x4_t __ret_478; \ - uint32x4_t __s0_478 = __p0_478; \ - uint16x8_t __s1_478 = __p1_478; \ - uint16x8_t __s2_478 = __p2_478; \ - __ret_478 = __s0_478 + vmull_u16(vget_high_u16(__s1_478), splat_laneq_u16(__s2_478, __p3_478)); \ +#define vmla_laneq_f32(__p0_478, __p1_478, __p2_478, __p3_478) __extension__ ({ \ + float32x2_t __ret_478; \ + float32x2_t __s0_478 = __p0_478; \ + float32x2_t __s1_478 = __p1_478; \ + float32x4_t __s2_478 = __p2_478; \ + __ret_478 = __s0_478 + __s1_478 * splat_laneq_f32(__s2_478, __p3_478); \ __ret_478; \ }) #else -#define vmlal_high_laneq_u16(__p0_479, __p1_479, __p2_479, __p3_479) __extension__ ({ \ - uint32x4_t __ret_479; \ - uint32x4_t __s0_479 = __p0_479; \ - uint16x8_t __s1_479 = __p1_479; \ - uint16x8_t __s2_479 = __p2_479; \ - uint32x4_t __rev0_479; __rev0_479 = __builtin_shufflevector(__s0_479, __s0_479, 3, 2, 1, 0); \ - uint16x8_t __rev1_479; __rev1_479 = __builtin_shufflevector(__s1_479, __s1_479, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_479; __rev2_479 = __builtin_shufflevector(__s2_479, __s2_479, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_479 = __rev0_479 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_479), __noswap_splat_laneq_u16(__rev2_479, __p3_479)); \ - __ret_479 = __builtin_shufflevector(__ret_479, __ret_479, 3, 2, 1, 0); \ +#define vmla_laneq_f32(__p0_479, __p1_479, __p2_479, __p3_479) __extension__ ({ \ + float32x2_t __ret_479; \ + float32x2_t __s0_479 = __p0_479; \ + float32x2_t __s1_479 = __p1_479; \ + float32x4_t __s2_479 = __p2_479; \ + float32x2_t __rev0_479; __rev0_479 = __builtin_shufflevector(__s0_479, __s0_479, __lane_reverse_64_32); \ + float32x2_t __rev1_479; __rev1_479 = __builtin_shufflevector(__s1_479, __s1_479, __lane_reverse_64_32); \ + float32x4_t __rev2_479; __rev2_479 = __builtin_shufflevector(__s2_479, __s2_479, __lane_reverse_128_32); \ + __ret_479 = __rev0_479 + __rev1_479 * __noswap_splat_laneq_f32(__rev2_479, __p3_479); \ + __ret_479 = __builtin_shufflevector(__ret_479, __ret_479, __lane_reverse_64_32); \ __ret_479; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_s32(__p0_480, __p1_480, __p2_480, __p3_480) __extension__ ({ \ - int64x2_t __ret_480; \ - int64x2_t __s0_480 = __p0_480; \ - int32x4_t __s1_480 = __p1_480; \ +#define vmla_laneq_s32(__p0_480, __p1_480, __p2_480, __p3_480) __extension__ ({ \ + int32x2_t __ret_480; \ + int32x2_t __s0_480 = __p0_480; \ + int32x2_t __s1_480 = __p1_480; \ int32x4_t __s2_480 = __p2_480; \ - __ret_480 = __s0_480 + vmull_s32(vget_high_s32(__s1_480), splat_laneq_s32(__s2_480, __p3_480)); \ + __ret_480 = __s0_480 + __s1_480 * splat_laneq_s32(__s2_480, __p3_480); \ __ret_480; \ }) #else -#define vmlal_high_laneq_s32(__p0_481, __p1_481, __p2_481, __p3_481) __extension__ ({ \ - int64x2_t __ret_481; \ - int64x2_t __s0_481 = __p0_481; \ - int32x4_t __s1_481 = __p1_481; \ +#define vmla_laneq_s32(__p0_481, __p1_481, __p2_481, __p3_481) __extension__ ({ \ + int32x2_t __ret_481; \ + int32x2_t __s0_481 = __p0_481; \ + int32x2_t __s1_481 = __p1_481; \ int32x4_t __s2_481 = __p2_481; \ - int64x2_t __rev0_481; __rev0_481 = __builtin_shufflevector(__s0_481, __s0_481, 1, 0); \ - int32x4_t __rev1_481; __rev1_481 = __builtin_shufflevector(__s1_481, __s1_481, 3, 2, 1, 0); \ - int32x4_t __rev2_481; __rev2_481 = __builtin_shufflevector(__s2_481, __s2_481, 3, 2, 1, 0); \ - __ret_481 = __rev0_481 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_481), __noswap_splat_laneq_s32(__rev2_481, __p3_481)); \ - __ret_481 = __builtin_shufflevector(__ret_481, __ret_481, 1, 0); \ + int32x2_t __rev0_481; __rev0_481 = __builtin_shufflevector(__s0_481, __s0_481, __lane_reverse_64_32); \ + int32x2_t __rev1_481; __rev1_481 = __builtin_shufflevector(__s1_481, __s1_481, __lane_reverse_64_32); \ + int32x4_t __rev2_481; __rev2_481 = __builtin_shufflevector(__s2_481, __s2_481, __lane_reverse_128_32); \ + __ret_481 = __rev0_481 + __rev1_481 * __noswap_splat_laneq_s32(__rev2_481, __p3_481); \ + __ret_481 = __builtin_shufflevector(__ret_481, __ret_481, __lane_reverse_64_32); \ __ret_481; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_s16(__p0_482, __p1_482, __p2_482, __p3_482) __extension__ ({ \ - int32x4_t __ret_482; \ - int32x4_t __s0_482 = __p0_482; \ - int16x8_t __s1_482 = __p1_482; \ +#define vmla_laneq_s16(__p0_482, __p1_482, __p2_482, __p3_482) __extension__ ({ \ + int16x4_t __ret_482; \ + int16x4_t __s0_482 = __p0_482; \ + int16x4_t __s1_482 = __p1_482; \ int16x8_t __s2_482 = __p2_482; \ - __ret_482 = __s0_482 + vmull_s16(vget_high_s16(__s1_482), splat_laneq_s16(__s2_482, __p3_482)); \ + __ret_482 = __s0_482 + __s1_482 * splat_laneq_s16(__s2_482, __p3_482); \ __ret_482; \ }) #else -#define vmlal_high_laneq_s16(__p0_483, __p1_483, __p2_483, __p3_483) __extension__ ({ \ - int32x4_t __ret_483; \ - int32x4_t __s0_483 = __p0_483; \ - int16x8_t __s1_483 = __p1_483; \ +#define vmla_laneq_s16(__p0_483, __p1_483, __p2_483, __p3_483) __extension__ ({ \ + int16x4_t __ret_483; \ + int16x4_t __s0_483 = __p0_483; \ + int16x4_t __s1_483 = __p1_483; \ int16x8_t __s2_483 = __p2_483; \ - int32x4_t __rev0_483; __rev0_483 = __builtin_shufflevector(__s0_483, __s0_483, 3, 2, 1, 0); \ - int16x8_t __rev1_483; __rev1_483 = __builtin_shufflevector(__s1_483, __s1_483, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_483; __rev2_483 = __builtin_shufflevector(__s2_483, __s2_483, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_483 = __rev0_483 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_483), __noswap_splat_laneq_s16(__rev2_483, __p3_483)); \ - __ret_483 = __builtin_shufflevector(__ret_483, __ret_483, 3, 2, 1, 0); \ + int16x4_t __rev0_483; __rev0_483 = __builtin_shufflevector(__s0_483, __s0_483, __lane_reverse_64_16); \ + int16x4_t __rev1_483; __rev1_483 = __builtin_shufflevector(__s1_483, __s1_483, __lane_reverse_64_16); \ + int16x8_t __rev2_483; __rev2_483 = __builtin_shufflevector(__s2_483, __s2_483, __lane_reverse_128_16); \ + __ret_483 = __rev0_483 + __rev1_483 * __noswap_splat_laneq_s16(__rev2_483, __p3_483); \ + __ret_483 = __builtin_shufflevector(__ret_483, __ret_483, __lane_reverse_64_16); \ __ret_483; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_u32(__p0_484, __p1_484, __p2_484, __p3_484) __extension__ ({ \ +#define vmlal_high_lane_u32(__p0_484, __p1_484, __p2_484, __p3_484) __extension__ ({ \ uint64x2_t __ret_484; \ uint64x2_t __s0_484 = __p0_484; \ - uint32x2_t __s1_484 = __p1_484; \ - uint32x4_t __s2_484 = __p2_484; \ - __ret_484 = __s0_484 + vmull_u32(__s1_484, splat_laneq_u32(__s2_484, __p3_484)); \ + uint32x4_t __s1_484 = __p1_484; \ + uint32x2_t __s2_484 = __p2_484; \ + __ret_484 = __s0_484 + vmull_u32(vget_high_u32(__s1_484), splat_lane_u32(__s2_484, __p3_484)); \ __ret_484; \ }) #else -#define vmlal_laneq_u32(__p0_485, __p1_485, __p2_485, __p3_485) __extension__ ({ \ +#define vmlal_high_lane_u32(__p0_485, __p1_485, __p2_485, __p3_485) __extension__ ({ \ uint64x2_t __ret_485; \ uint64x2_t __s0_485 = __p0_485; \ - uint32x2_t __s1_485 = __p1_485; \ - uint32x4_t __s2_485 = __p2_485; \ - uint64x2_t __rev0_485; __rev0_485 = __builtin_shufflevector(__s0_485, __s0_485, 1, 0); \ - uint32x2_t __rev1_485; __rev1_485 = __builtin_shufflevector(__s1_485, __s1_485, 1, 0); \ - uint32x4_t __rev2_485; __rev2_485 = __builtin_shufflevector(__s2_485, __s2_485, 3, 2, 1, 0); \ - __ret_485 = __rev0_485 + __noswap_vmull_u32(__rev1_485, __noswap_splat_laneq_u32(__rev2_485, __p3_485)); \ - __ret_485 = __builtin_shufflevector(__ret_485, __ret_485, 1, 0); \ + uint32x4_t __s1_485 = __p1_485; \ + uint32x2_t __s2_485 = __p2_485; \ + uint64x2_t __rev0_485; __rev0_485 = __builtin_shufflevector(__s0_485, __s0_485, __lane_reverse_128_64); \ + uint32x4_t __rev1_485; __rev1_485 = __builtin_shufflevector(__s1_485, __s1_485, __lane_reverse_128_32); \ + uint32x2_t __rev2_485; __rev2_485 = __builtin_shufflevector(__s2_485, __s2_485, __lane_reverse_64_32); \ + __ret_485 = __rev0_485 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_485), __noswap_splat_lane_u32(__rev2_485, __p3_485)); \ + __ret_485 = __builtin_shufflevector(__ret_485, __ret_485, __lane_reverse_128_64); \ __ret_485; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_u16(__p0_486, __p1_486, __p2_486, __p3_486) __extension__ ({ \ +#define vmlal_high_lane_u16(__p0_486, __p1_486, __p2_486, __p3_486) __extension__ ({ \ uint32x4_t __ret_486; \ uint32x4_t __s0_486 = __p0_486; \ - uint16x4_t __s1_486 = __p1_486; \ - uint16x8_t __s2_486 = __p2_486; \ - __ret_486 = __s0_486 + vmull_u16(__s1_486, splat_laneq_u16(__s2_486, __p3_486)); \ + uint16x8_t __s1_486 = __p1_486; \ + uint16x4_t __s2_486 = __p2_486; \ + __ret_486 = __s0_486 + vmull_u16(vget_high_u16(__s1_486), splat_lane_u16(__s2_486, __p3_486)); \ __ret_486; \ }) #else -#define vmlal_laneq_u16(__p0_487, __p1_487, __p2_487, __p3_487) __extension__ ({ \ +#define vmlal_high_lane_u16(__p0_487, __p1_487, __p2_487, __p3_487) __extension__ ({ \ uint32x4_t __ret_487; \ uint32x4_t __s0_487 = __p0_487; \ - uint16x4_t __s1_487 = __p1_487; \ - uint16x8_t __s2_487 = __p2_487; \ - uint32x4_t __rev0_487; __rev0_487 = __builtin_shufflevector(__s0_487, __s0_487, 3, 2, 1, 0); \ - uint16x4_t __rev1_487; __rev1_487 = __builtin_shufflevector(__s1_487, __s1_487, 3, 2, 1, 0); \ - uint16x8_t __rev2_487; __rev2_487 = __builtin_shufflevector(__s2_487, __s2_487, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_487 = __rev0_487 + __noswap_vmull_u16(__rev1_487, __noswap_splat_laneq_u16(__rev2_487, __p3_487)); \ - __ret_487 = __builtin_shufflevector(__ret_487, __ret_487, 3, 2, 1, 0); \ + uint16x8_t __s1_487 = __p1_487; \ + uint16x4_t __s2_487 = __p2_487; \ + uint32x4_t __rev0_487; __rev0_487 = __builtin_shufflevector(__s0_487, __s0_487, __lane_reverse_128_32); \ + uint16x8_t __rev1_487; __rev1_487 = __builtin_shufflevector(__s1_487, __s1_487, __lane_reverse_128_16); \ + uint16x4_t __rev2_487; __rev2_487 = __builtin_shufflevector(__s2_487, __s2_487, __lane_reverse_64_16); \ + __ret_487 = __rev0_487 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_487), __noswap_splat_lane_u16(__rev2_487, __p3_487)); \ + __ret_487 = __builtin_shufflevector(__ret_487, __ret_487, __lane_reverse_128_32); \ __ret_487; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_s32(__p0_488, __p1_488, __p2_488, __p3_488) __extension__ ({ \ +#define vmlal_high_lane_s32(__p0_488, __p1_488, __p2_488, __p3_488) __extension__ ({ \ int64x2_t __ret_488; \ int64x2_t __s0_488 = __p0_488; \ - int32x2_t __s1_488 = __p1_488; \ - int32x4_t __s2_488 = __p2_488; \ - __ret_488 = __s0_488 + vmull_s32(__s1_488, splat_laneq_s32(__s2_488, __p3_488)); \ + int32x4_t __s1_488 = __p1_488; \ + int32x2_t __s2_488 = __p2_488; \ + __ret_488 = __s0_488 + vmull_s32(vget_high_s32(__s1_488), splat_lane_s32(__s2_488, __p3_488)); \ __ret_488; \ }) #else -#define vmlal_laneq_s32(__p0_489, __p1_489, __p2_489, __p3_489) __extension__ ({ \ +#define vmlal_high_lane_s32(__p0_489, __p1_489, __p2_489, __p3_489) __extension__ ({ \ int64x2_t __ret_489; \ int64x2_t __s0_489 = __p0_489; \ - int32x2_t __s1_489 = __p1_489; \ - int32x4_t __s2_489 = __p2_489; \ - int64x2_t __rev0_489; __rev0_489 = __builtin_shufflevector(__s0_489, __s0_489, 1, 0); \ - int32x2_t __rev1_489; __rev1_489 = __builtin_shufflevector(__s1_489, __s1_489, 1, 0); \ - int32x4_t __rev2_489; __rev2_489 = __builtin_shufflevector(__s2_489, __s2_489, 3, 2, 1, 0); \ - __ret_489 = __rev0_489 + __noswap_vmull_s32(__rev1_489, __noswap_splat_laneq_s32(__rev2_489, __p3_489)); \ - __ret_489 = __builtin_shufflevector(__ret_489, __ret_489, 1, 0); \ + int32x4_t __s1_489 = __p1_489; \ + int32x2_t __s2_489 = __p2_489; \ + int64x2_t __rev0_489; __rev0_489 = __builtin_shufflevector(__s0_489, __s0_489, __lane_reverse_128_64); \ + int32x4_t __rev1_489; __rev1_489 = __builtin_shufflevector(__s1_489, __s1_489, __lane_reverse_128_32); \ + int32x2_t __rev2_489; __rev2_489 = __builtin_shufflevector(__s2_489, __s2_489, __lane_reverse_64_32); \ + __ret_489 = __rev0_489 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_489), __noswap_splat_lane_s32(__rev2_489, __p3_489)); \ + __ret_489 = __builtin_shufflevector(__ret_489, __ret_489, __lane_reverse_128_64); \ __ret_489; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_s16(__p0_490, __p1_490, __p2_490, __p3_490) __extension__ ({ \ +#define vmlal_high_lane_s16(__p0_490, __p1_490, __p2_490, __p3_490) __extension__ ({ \ int32x4_t __ret_490; \ int32x4_t __s0_490 = __p0_490; \ - int16x4_t __s1_490 = __p1_490; \ - int16x8_t __s2_490 = __p2_490; \ - __ret_490 = __s0_490 + vmull_s16(__s1_490, splat_laneq_s16(__s2_490, __p3_490)); \ + int16x8_t __s1_490 = __p1_490; \ + int16x4_t __s2_490 = __p2_490; \ + __ret_490 = __s0_490 + vmull_s16(vget_high_s16(__s1_490), splat_lane_s16(__s2_490, __p3_490)); \ __ret_490; \ }) #else -#define vmlal_laneq_s16(__p0_491, __p1_491, __p2_491, __p3_491) __extension__ ({ \ +#define vmlal_high_lane_s16(__p0_491, __p1_491, __p2_491, __p3_491) __extension__ ({ \ int32x4_t __ret_491; \ int32x4_t __s0_491 = __p0_491; \ - int16x4_t __s1_491 = __p1_491; \ - int16x8_t __s2_491 = __p2_491; \ - int32x4_t __rev0_491; __rev0_491 = __builtin_shufflevector(__s0_491, __s0_491, 3, 2, 1, 0); \ - int16x4_t __rev1_491; __rev1_491 = __builtin_shufflevector(__s1_491, __s1_491, 3, 2, 1, 0); \ - int16x8_t __rev2_491; __rev2_491 = __builtin_shufflevector(__s2_491, __s2_491, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_491 = __rev0_491 + __noswap_vmull_s16(__rev1_491, __noswap_splat_laneq_s16(__rev2_491, __p3_491)); \ - __ret_491 = __builtin_shufflevector(__ret_491, __ret_491, 3, 2, 1, 0); \ + int16x8_t __s1_491 = __p1_491; \ + int16x4_t __s2_491 = __p2_491; \ + int32x4_t __rev0_491; __rev0_491 = __builtin_shufflevector(__s0_491, __s0_491, __lane_reverse_128_32); \ + int16x8_t __rev1_491; __rev1_491 = __builtin_shufflevector(__s1_491, __s1_491, __lane_reverse_128_16); \ + int16x4_t __rev2_491; __rev2_491 = __builtin_shufflevector(__s2_491, __s2_491, __lane_reverse_64_16); \ + __ret_491 = __rev0_491 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_491), __noswap_splat_lane_s16(__rev2_491, __p3_491)); \ + __ret_491 = __builtin_shufflevector(__ret_491, __ret_491, __lane_reverse_128_32); \ __ret_491; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vmlal_high_laneq_u32(__p0_492, __p1_492, __p2_492, __p3_492) __extension__ ({ \ + uint64x2_t __ret_492; \ + uint64x2_t __s0_492 = __p0_492; \ + uint32x4_t __s1_492 = __p1_492; \ + uint32x4_t __s2_492 = __p2_492; \ + __ret_492 = __s0_492 + vmull_u32(vget_high_u32(__s1_492), splat_laneq_u32(__s2_492, __p3_492)); \ + __ret_492; \ +}) +#else +#define vmlal_high_laneq_u32(__p0_493, __p1_493, __p2_493, __p3_493) __extension__ ({ \ + uint64x2_t __ret_493; \ + uint64x2_t __s0_493 = __p0_493; \ + uint32x4_t __s1_493 = __p1_493; \ + uint32x4_t __s2_493 = __p2_493; \ + uint64x2_t __rev0_493; __rev0_493 = __builtin_shufflevector(__s0_493, __s0_493, __lane_reverse_128_64); \ + uint32x4_t __rev1_493; __rev1_493 = __builtin_shufflevector(__s1_493, __s1_493, __lane_reverse_128_32); \ + uint32x4_t __rev2_493; __rev2_493 = __builtin_shufflevector(__s2_493, __s2_493, __lane_reverse_128_32); \ + __ret_493 = __rev0_493 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_493), __noswap_splat_laneq_u32(__rev2_493, __p3_493)); \ + __ret_493 = __builtin_shufflevector(__ret_493, __ret_493, __lane_reverse_128_64); \ + __ret_493; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_high_laneq_u16(__p0_494, __p1_494, __p2_494, __p3_494) __extension__ ({ \ + uint32x4_t __ret_494; \ + uint32x4_t __s0_494 = __p0_494; \ + uint16x8_t __s1_494 = __p1_494; \ + uint16x8_t __s2_494 = __p2_494; \ + __ret_494 = __s0_494 + vmull_u16(vget_high_u16(__s1_494), splat_laneq_u16(__s2_494, __p3_494)); \ + __ret_494; \ +}) +#else +#define vmlal_high_laneq_u16(__p0_495, __p1_495, __p2_495, __p3_495) __extension__ ({ \ + uint32x4_t __ret_495; \ + uint32x4_t __s0_495 = __p0_495; \ + uint16x8_t __s1_495 = __p1_495; \ + uint16x8_t __s2_495 = __p2_495; \ + uint32x4_t __rev0_495; __rev0_495 = __builtin_shufflevector(__s0_495, __s0_495, __lane_reverse_128_32); \ + uint16x8_t __rev1_495; __rev1_495 = __builtin_shufflevector(__s1_495, __s1_495, __lane_reverse_128_16); \ + uint16x8_t __rev2_495; __rev2_495 = __builtin_shufflevector(__s2_495, __s2_495, __lane_reverse_128_16); \ + __ret_495 = __rev0_495 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_495), __noswap_splat_laneq_u16(__rev2_495, __p3_495)); \ + __ret_495 = __builtin_shufflevector(__ret_495, __ret_495, __lane_reverse_128_32); \ + __ret_495; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_high_laneq_s32(__p0_496, __p1_496, __p2_496, __p3_496) __extension__ ({ \ + int64x2_t __ret_496; \ + int64x2_t __s0_496 = __p0_496; \ + int32x4_t __s1_496 = __p1_496; \ + int32x4_t __s2_496 = __p2_496; \ + __ret_496 = __s0_496 + vmull_s32(vget_high_s32(__s1_496), splat_laneq_s32(__s2_496, __p3_496)); \ + __ret_496; \ +}) +#else +#define vmlal_high_laneq_s32(__p0_497, __p1_497, __p2_497, __p3_497) __extension__ ({ \ + int64x2_t __ret_497; \ + int64x2_t __s0_497 = __p0_497; \ + int32x4_t __s1_497 = __p1_497; \ + int32x4_t __s2_497 = __p2_497; \ + int64x2_t __rev0_497; __rev0_497 = __builtin_shufflevector(__s0_497, __s0_497, __lane_reverse_128_64); \ + int32x4_t __rev1_497; __rev1_497 = __builtin_shufflevector(__s1_497, __s1_497, __lane_reverse_128_32); \ + int32x4_t __rev2_497; __rev2_497 = __builtin_shufflevector(__s2_497, __s2_497, __lane_reverse_128_32); \ + __ret_497 = __rev0_497 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_497), __noswap_splat_laneq_s32(__rev2_497, __p3_497)); \ + __ret_497 = __builtin_shufflevector(__ret_497, __ret_497, __lane_reverse_128_64); \ + __ret_497; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_high_laneq_s16(__p0_498, __p1_498, __p2_498, __p3_498) __extension__ ({ \ + int32x4_t __ret_498; \ + int32x4_t __s0_498 = __p0_498; \ + int16x8_t __s1_498 = __p1_498; \ + int16x8_t __s2_498 = __p2_498; \ + __ret_498 = __s0_498 + vmull_s16(vget_high_s16(__s1_498), splat_laneq_s16(__s2_498, __p3_498)); \ + __ret_498; \ +}) +#else +#define vmlal_high_laneq_s16(__p0_499, __p1_499, __p2_499, __p3_499) __extension__ ({ \ + int32x4_t __ret_499; \ + int32x4_t __s0_499 = __p0_499; \ + int16x8_t __s1_499 = __p1_499; \ + int16x8_t __s2_499 = __p2_499; \ + int32x4_t __rev0_499; __rev0_499 = __builtin_shufflevector(__s0_499, __s0_499, __lane_reverse_128_32); \ + int16x8_t __rev1_499; __rev1_499 = __builtin_shufflevector(__s1_499, __s1_499, __lane_reverse_128_16); \ + int16x8_t __rev2_499; __rev2_499 = __builtin_shufflevector(__s2_499, __s2_499, __lane_reverse_128_16); \ + __ret_499 = __rev0_499 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_499), __noswap_splat_laneq_s16(__rev2_499, __p3_499)); \ + __ret_499 = __builtin_shufflevector(__ret_499, __ret_499, __lane_reverse_128_32); \ + __ret_499; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_laneq_u32(__p0_500, __p1_500, __p2_500, __p3_500) __extension__ ({ \ + uint64x2_t __ret_500; \ + uint64x2_t __s0_500 = __p0_500; \ + uint32x2_t __s1_500 = __p1_500; \ + uint32x4_t __s2_500 = __p2_500; \ + __ret_500 = __s0_500 + vmull_u32(__s1_500, splat_laneq_u32(__s2_500, __p3_500)); \ + __ret_500; \ +}) +#else +#define vmlal_laneq_u32(__p0_501, __p1_501, __p2_501, __p3_501) __extension__ ({ \ + uint64x2_t __ret_501; \ + uint64x2_t __s0_501 = __p0_501; \ + uint32x2_t __s1_501 = __p1_501; \ + uint32x4_t __s2_501 = __p2_501; \ + uint64x2_t __rev0_501; __rev0_501 = __builtin_shufflevector(__s0_501, __s0_501, __lane_reverse_128_64); \ + uint32x2_t __rev1_501; __rev1_501 = __builtin_shufflevector(__s1_501, __s1_501, __lane_reverse_64_32); \ + uint32x4_t __rev2_501; __rev2_501 = __builtin_shufflevector(__s2_501, __s2_501, __lane_reverse_128_32); \ + __ret_501 = __rev0_501 + __noswap_vmull_u32(__rev1_501, __noswap_splat_laneq_u32(__rev2_501, __p3_501)); \ + __ret_501 = __builtin_shufflevector(__ret_501, __ret_501, __lane_reverse_128_64); \ + __ret_501; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_laneq_u16(__p0_502, __p1_502, __p2_502, __p3_502) __extension__ ({ \ + uint32x4_t __ret_502; \ + uint32x4_t __s0_502 = __p0_502; \ + uint16x4_t __s1_502 = __p1_502; \ + uint16x8_t __s2_502 = __p2_502; \ + __ret_502 = __s0_502 + vmull_u16(__s1_502, splat_laneq_u16(__s2_502, __p3_502)); \ + __ret_502; \ +}) +#else +#define vmlal_laneq_u16(__p0_503, __p1_503, __p2_503, __p3_503) __extension__ ({ \ + uint32x4_t __ret_503; \ + uint32x4_t __s0_503 = __p0_503; \ + uint16x4_t __s1_503 = __p1_503; \ + uint16x8_t __s2_503 = __p2_503; \ + uint32x4_t __rev0_503; __rev0_503 = __builtin_shufflevector(__s0_503, __s0_503, __lane_reverse_128_32); \ + uint16x4_t __rev1_503; __rev1_503 = __builtin_shufflevector(__s1_503, __s1_503, __lane_reverse_64_16); \ + uint16x8_t __rev2_503; __rev2_503 = __builtin_shufflevector(__s2_503, __s2_503, __lane_reverse_128_16); \ + __ret_503 = __rev0_503 + __noswap_vmull_u16(__rev1_503, __noswap_splat_laneq_u16(__rev2_503, __p3_503)); \ + __ret_503 = __builtin_shufflevector(__ret_503, __ret_503, __lane_reverse_128_32); \ + __ret_503; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_laneq_s32(__p0_504, __p1_504, __p2_504, __p3_504) __extension__ ({ \ + int64x2_t __ret_504; \ + int64x2_t __s0_504 = __p0_504; \ + int32x2_t __s1_504 = __p1_504; \ + int32x4_t __s2_504 = __p2_504; \ + __ret_504 = __s0_504 + vmull_s32(__s1_504, splat_laneq_s32(__s2_504, __p3_504)); \ + __ret_504; \ +}) +#else +#define vmlal_laneq_s32(__p0_505, __p1_505, __p2_505, __p3_505) __extension__ ({ \ + int64x2_t __ret_505; \ + int64x2_t __s0_505 = __p0_505; \ + int32x2_t __s1_505 = __p1_505; \ + int32x4_t __s2_505 = __p2_505; \ + int64x2_t __rev0_505; __rev0_505 = __builtin_shufflevector(__s0_505, __s0_505, __lane_reverse_128_64); \ + int32x2_t __rev1_505; __rev1_505 = __builtin_shufflevector(__s1_505, __s1_505, __lane_reverse_64_32); \ + int32x4_t __rev2_505; __rev2_505 = __builtin_shufflevector(__s2_505, __s2_505, __lane_reverse_128_32); \ + __ret_505 = __rev0_505 + __noswap_vmull_s32(__rev1_505, __noswap_splat_laneq_s32(__rev2_505, __p3_505)); \ + __ret_505 = __builtin_shufflevector(__ret_505, __ret_505, __lane_reverse_128_64); \ + __ret_505; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlal_laneq_s16(__p0_506, __p1_506, __p2_506, __p3_506) __extension__ ({ \ + int32x4_t __ret_506; \ + int32x4_t __s0_506 = __p0_506; \ + int16x4_t __s1_506 = __p1_506; \ + int16x8_t __s2_506 = __p2_506; \ + __ret_506 = __s0_506 + vmull_s16(__s1_506, splat_laneq_s16(__s2_506, __p3_506)); \ + __ret_506; \ +}) +#else +#define vmlal_laneq_s16(__p0_507, __p1_507, __p2_507, __p3_507) __extension__ ({ \ + int32x4_t __ret_507; \ + int32x4_t __s0_507 = __p0_507; \ + int16x4_t __s1_507 = __p1_507; \ + int16x8_t __s2_507 = __p2_507; \ + int32x4_t __rev0_507; __rev0_507 = __builtin_shufflevector(__s0_507, __s0_507, __lane_reverse_128_32); \ + int16x4_t __rev1_507; __rev1_507 = __builtin_shufflevector(__s1_507, __s1_507, __lane_reverse_64_16); \ + int16x8_t __rev2_507; __rev2_507 = __builtin_shufflevector(__s2_507, __s2_507, __lane_reverse_128_16); \ + __ret_507 = __rev0_507 + __noswap_vmull_s16(__rev1_507, __noswap_splat_laneq_s16(__rev2_507, __p3_507)); \ + __ret_507 = __builtin_shufflevector(__ret_507, __ret_507, __lane_reverse_128_32); \ + __ret_507; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; @@ -53471,11 +55129,11 @@ __ai __attribute__((target("neon"))) float64x2_t vmlsq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vmlsq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __rev0 - __rev1 * __rev2; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -53486,533 +55144,533 @@ __ai __attribute__((target("neon"))) float64x1_t vmls_f64(float64x1_t __p0, floa return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_u32(__p0_492, __p1_492, __p2_492, __p3_492) __extension__ ({ \ - uint32x4_t __ret_492; \ - uint32x4_t __s0_492 = __p0_492; \ - uint32x4_t __s1_492 = __p1_492; \ - uint32x4_t __s2_492 = __p2_492; \ - __ret_492 = __s0_492 - __s1_492 * splatq_laneq_u32(__s2_492, __p3_492); \ - __ret_492; \ -}) -#else -#define vmlsq_laneq_u32(__p0_493, __p1_493, __p2_493, __p3_493) __extension__ ({ \ - uint32x4_t __ret_493; \ - uint32x4_t __s0_493 = __p0_493; \ - uint32x4_t __s1_493 = __p1_493; \ - uint32x4_t __s2_493 = __p2_493; \ - uint32x4_t __rev0_493; __rev0_493 = __builtin_shufflevector(__s0_493, __s0_493, 3, 2, 1, 0); \ - uint32x4_t __rev1_493; __rev1_493 = __builtin_shufflevector(__s1_493, __s1_493, 3, 2, 1, 0); \ - uint32x4_t __rev2_493; __rev2_493 = __builtin_shufflevector(__s2_493, __s2_493, 3, 2, 1, 0); \ - __ret_493 = __rev0_493 - __rev1_493 * __noswap_splatq_laneq_u32(__rev2_493, __p3_493); \ - __ret_493 = __builtin_shufflevector(__ret_493, __ret_493, 3, 2, 1, 0); \ - __ret_493; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_u16(__p0_494, __p1_494, __p2_494, __p3_494) __extension__ ({ \ - uint16x8_t __ret_494; \ - uint16x8_t __s0_494 = __p0_494; \ - uint16x8_t __s1_494 = __p1_494; \ - uint16x8_t __s2_494 = __p2_494; \ - __ret_494 = __s0_494 - __s1_494 * splatq_laneq_u16(__s2_494, __p3_494); \ - __ret_494; \ -}) -#else -#define vmlsq_laneq_u16(__p0_495, __p1_495, __p2_495, __p3_495) __extension__ ({ \ - uint16x8_t __ret_495; \ - uint16x8_t __s0_495 = __p0_495; \ - uint16x8_t __s1_495 = __p1_495; \ - uint16x8_t __s2_495 = __p2_495; \ - uint16x8_t __rev0_495; __rev0_495 = __builtin_shufflevector(__s0_495, __s0_495, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_495; __rev1_495 = __builtin_shufflevector(__s1_495, __s1_495, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_495; __rev2_495 = __builtin_shufflevector(__s2_495, __s2_495, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_495 = __rev0_495 - __rev1_495 * __noswap_splatq_laneq_u16(__rev2_495, __p3_495); \ - __ret_495 = __builtin_shufflevector(__ret_495, __ret_495, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_495; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_f32(__p0_496, __p1_496, __p2_496, __p3_496) __extension__ ({ \ - float32x4_t __ret_496; \ - float32x4_t __s0_496 = __p0_496; \ - float32x4_t __s1_496 = __p1_496; \ - float32x4_t __s2_496 = __p2_496; \ - __ret_496 = __s0_496 - __s1_496 * splatq_laneq_f32(__s2_496, __p3_496); \ - __ret_496; \ -}) -#else -#define vmlsq_laneq_f32(__p0_497, __p1_497, __p2_497, __p3_497) __extension__ ({ \ - float32x4_t __ret_497; \ - float32x4_t __s0_497 = __p0_497; \ - float32x4_t __s1_497 = __p1_497; \ - float32x4_t __s2_497 = __p2_497; \ - float32x4_t __rev0_497; __rev0_497 = __builtin_shufflevector(__s0_497, __s0_497, 3, 2, 1, 0); \ - float32x4_t __rev1_497; __rev1_497 = __builtin_shufflevector(__s1_497, __s1_497, 3, 2, 1, 0); \ - float32x4_t __rev2_497; __rev2_497 = __builtin_shufflevector(__s2_497, __s2_497, 3, 2, 1, 0); \ - __ret_497 = __rev0_497 - __rev1_497 * __noswap_splatq_laneq_f32(__rev2_497, __p3_497); \ - __ret_497 = __builtin_shufflevector(__ret_497, __ret_497, 3, 2, 1, 0); \ - __ret_497; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_s32(__p0_498, __p1_498, __p2_498, __p3_498) __extension__ ({ \ - int32x4_t __ret_498; \ - int32x4_t __s0_498 = __p0_498; \ - int32x4_t __s1_498 = __p1_498; \ - int32x4_t __s2_498 = __p2_498; \ - __ret_498 = __s0_498 - __s1_498 * splatq_laneq_s32(__s2_498, __p3_498); \ - __ret_498; \ -}) -#else -#define vmlsq_laneq_s32(__p0_499, __p1_499, __p2_499, __p3_499) __extension__ ({ \ - int32x4_t __ret_499; \ - int32x4_t __s0_499 = __p0_499; \ - int32x4_t __s1_499 = __p1_499; \ - int32x4_t __s2_499 = __p2_499; \ - int32x4_t __rev0_499; __rev0_499 = __builtin_shufflevector(__s0_499, __s0_499, 3, 2, 1, 0); \ - int32x4_t __rev1_499; __rev1_499 = __builtin_shufflevector(__s1_499, __s1_499, 3, 2, 1, 0); \ - int32x4_t __rev2_499; __rev2_499 = __builtin_shufflevector(__s2_499, __s2_499, 3, 2, 1, 0); \ - __ret_499 = __rev0_499 - __rev1_499 * __noswap_splatq_laneq_s32(__rev2_499, __p3_499); \ - __ret_499 = __builtin_shufflevector(__ret_499, __ret_499, 3, 2, 1, 0); \ - __ret_499; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_s16(__p0_500, __p1_500, __p2_500, __p3_500) __extension__ ({ \ - int16x8_t __ret_500; \ - int16x8_t __s0_500 = __p0_500; \ - int16x8_t __s1_500 = __p1_500; \ - int16x8_t __s2_500 = __p2_500; \ - __ret_500 = __s0_500 - __s1_500 * splatq_laneq_s16(__s2_500, __p3_500); \ - __ret_500; \ -}) -#else -#define vmlsq_laneq_s16(__p0_501, __p1_501, __p2_501, __p3_501) __extension__ ({ \ - int16x8_t __ret_501; \ - int16x8_t __s0_501 = __p0_501; \ - int16x8_t __s1_501 = __p1_501; \ - int16x8_t __s2_501 = __p2_501; \ - int16x8_t __rev0_501; __rev0_501 = __builtin_shufflevector(__s0_501, __s0_501, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_501; __rev1_501 = __builtin_shufflevector(__s1_501, __s1_501, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_501; __rev2_501 = __builtin_shufflevector(__s2_501, __s2_501, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_501 = __rev0_501 - __rev1_501 * __noswap_splatq_laneq_s16(__rev2_501, __p3_501); \ - __ret_501 = __builtin_shufflevector(__ret_501, __ret_501, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_501; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_u32(__p0_502, __p1_502, __p2_502, __p3_502) __extension__ ({ \ - uint32x2_t __ret_502; \ - uint32x2_t __s0_502 = __p0_502; \ - uint32x2_t __s1_502 = __p1_502; \ - uint32x4_t __s2_502 = __p2_502; \ - __ret_502 = __s0_502 - __s1_502 * splat_laneq_u32(__s2_502, __p3_502); \ - __ret_502; \ -}) -#else -#define vmls_laneq_u32(__p0_503, __p1_503, __p2_503, __p3_503) __extension__ ({ \ - uint32x2_t __ret_503; \ - uint32x2_t __s0_503 = __p0_503; \ - uint32x2_t __s1_503 = __p1_503; \ - uint32x4_t __s2_503 = __p2_503; \ - uint32x2_t __rev0_503; __rev0_503 = __builtin_shufflevector(__s0_503, __s0_503, 1, 0); \ - uint32x2_t __rev1_503; __rev1_503 = __builtin_shufflevector(__s1_503, __s1_503, 1, 0); \ - uint32x4_t __rev2_503; __rev2_503 = __builtin_shufflevector(__s2_503, __s2_503, 3, 2, 1, 0); \ - __ret_503 = __rev0_503 - __rev1_503 * __noswap_splat_laneq_u32(__rev2_503, __p3_503); \ - __ret_503 = __builtin_shufflevector(__ret_503, __ret_503, 1, 0); \ - __ret_503; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_u16(__p0_504, __p1_504, __p2_504, __p3_504) __extension__ ({ \ - uint16x4_t __ret_504; \ - uint16x4_t __s0_504 = __p0_504; \ - uint16x4_t __s1_504 = __p1_504; \ - uint16x8_t __s2_504 = __p2_504; \ - __ret_504 = __s0_504 - __s1_504 * splat_laneq_u16(__s2_504, __p3_504); \ - __ret_504; \ -}) -#else -#define vmls_laneq_u16(__p0_505, __p1_505, __p2_505, __p3_505) __extension__ ({ \ - uint16x4_t __ret_505; \ - uint16x4_t __s0_505 = __p0_505; \ - uint16x4_t __s1_505 = __p1_505; \ - uint16x8_t __s2_505 = __p2_505; \ - uint16x4_t __rev0_505; __rev0_505 = __builtin_shufflevector(__s0_505, __s0_505, 3, 2, 1, 0); \ - uint16x4_t __rev1_505; __rev1_505 = __builtin_shufflevector(__s1_505, __s1_505, 3, 2, 1, 0); \ - uint16x8_t __rev2_505; __rev2_505 = __builtin_shufflevector(__s2_505, __s2_505, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_505 = __rev0_505 - __rev1_505 * __noswap_splat_laneq_u16(__rev2_505, __p3_505); \ - __ret_505 = __builtin_shufflevector(__ret_505, __ret_505, 3, 2, 1, 0); \ - __ret_505; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_f32(__p0_506, __p1_506, __p2_506, __p3_506) __extension__ ({ \ - float32x2_t __ret_506; \ - float32x2_t __s0_506 = __p0_506; \ - float32x2_t __s1_506 = __p1_506; \ - float32x4_t __s2_506 = __p2_506; \ - __ret_506 = __s0_506 - __s1_506 * splat_laneq_f32(__s2_506, __p3_506); \ - __ret_506; \ -}) -#else -#define vmls_laneq_f32(__p0_507, __p1_507, __p2_507, __p3_507) __extension__ ({ \ - float32x2_t __ret_507; \ - float32x2_t __s0_507 = __p0_507; \ - float32x2_t __s1_507 = __p1_507; \ - float32x4_t __s2_507 = __p2_507; \ - float32x2_t __rev0_507; __rev0_507 = __builtin_shufflevector(__s0_507, __s0_507, 1, 0); \ - float32x2_t __rev1_507; __rev1_507 = __builtin_shufflevector(__s1_507, __s1_507, 1, 0); \ - float32x4_t __rev2_507; __rev2_507 = __builtin_shufflevector(__s2_507, __s2_507, 3, 2, 1, 0); \ - __ret_507 = __rev0_507 - __rev1_507 * __noswap_splat_laneq_f32(__rev2_507, __p3_507); \ - __ret_507 = __builtin_shufflevector(__ret_507, __ret_507, 1, 0); \ - __ret_507; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_s32(__p0_508, __p1_508, __p2_508, __p3_508) __extension__ ({ \ - int32x2_t __ret_508; \ - int32x2_t __s0_508 = __p0_508; \ - int32x2_t __s1_508 = __p1_508; \ - int32x4_t __s2_508 = __p2_508; \ - __ret_508 = __s0_508 - __s1_508 * splat_laneq_s32(__s2_508, __p3_508); \ +#define vmlsq_laneq_u32(__p0_508, __p1_508, __p2_508, __p3_508) __extension__ ({ \ + uint32x4_t __ret_508; \ + uint32x4_t __s0_508 = __p0_508; \ + uint32x4_t __s1_508 = __p1_508; \ + uint32x4_t __s2_508 = __p2_508; \ + __ret_508 = __s0_508 - __s1_508 * splatq_laneq_u32(__s2_508, __p3_508); \ __ret_508; \ }) #else -#define vmls_laneq_s32(__p0_509, __p1_509, __p2_509, __p3_509) __extension__ ({ \ - int32x2_t __ret_509; \ - int32x2_t __s0_509 = __p0_509; \ - int32x2_t __s1_509 = __p1_509; \ - int32x4_t __s2_509 = __p2_509; \ - int32x2_t __rev0_509; __rev0_509 = __builtin_shufflevector(__s0_509, __s0_509, 1, 0); \ - int32x2_t __rev1_509; __rev1_509 = __builtin_shufflevector(__s1_509, __s1_509, 1, 0); \ - int32x4_t __rev2_509; __rev2_509 = __builtin_shufflevector(__s2_509, __s2_509, 3, 2, 1, 0); \ - __ret_509 = __rev0_509 - __rev1_509 * __noswap_splat_laneq_s32(__rev2_509, __p3_509); \ - __ret_509 = __builtin_shufflevector(__ret_509, __ret_509, 1, 0); \ +#define vmlsq_laneq_u32(__p0_509, __p1_509, __p2_509, __p3_509) __extension__ ({ \ + uint32x4_t __ret_509; \ + uint32x4_t __s0_509 = __p0_509; \ + uint32x4_t __s1_509 = __p1_509; \ + uint32x4_t __s2_509 = __p2_509; \ + uint32x4_t __rev0_509; __rev0_509 = __builtin_shufflevector(__s0_509, __s0_509, __lane_reverse_128_32); \ + uint32x4_t __rev1_509; __rev1_509 = __builtin_shufflevector(__s1_509, __s1_509, __lane_reverse_128_32); \ + uint32x4_t __rev2_509; __rev2_509 = __builtin_shufflevector(__s2_509, __s2_509, __lane_reverse_128_32); \ + __ret_509 = __rev0_509 - __rev1_509 * __noswap_splatq_laneq_u32(__rev2_509, __p3_509); \ + __ret_509 = __builtin_shufflevector(__ret_509, __ret_509, __lane_reverse_128_32); \ __ret_509; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_s16(__p0_510, __p1_510, __p2_510, __p3_510) __extension__ ({ \ - int16x4_t __ret_510; \ - int16x4_t __s0_510 = __p0_510; \ - int16x4_t __s1_510 = __p1_510; \ - int16x8_t __s2_510 = __p2_510; \ - __ret_510 = __s0_510 - __s1_510 * splat_laneq_s16(__s2_510, __p3_510); \ +#define vmlsq_laneq_u16(__p0_510, __p1_510, __p2_510, __p3_510) __extension__ ({ \ + uint16x8_t __ret_510; \ + uint16x8_t __s0_510 = __p0_510; \ + uint16x8_t __s1_510 = __p1_510; \ + uint16x8_t __s2_510 = __p2_510; \ + __ret_510 = __s0_510 - __s1_510 * splatq_laneq_u16(__s2_510, __p3_510); \ __ret_510; \ }) #else -#define vmls_laneq_s16(__p0_511, __p1_511, __p2_511, __p3_511) __extension__ ({ \ - int16x4_t __ret_511; \ - int16x4_t __s0_511 = __p0_511; \ - int16x4_t __s1_511 = __p1_511; \ - int16x8_t __s2_511 = __p2_511; \ - int16x4_t __rev0_511; __rev0_511 = __builtin_shufflevector(__s0_511, __s0_511, 3, 2, 1, 0); \ - int16x4_t __rev1_511; __rev1_511 = __builtin_shufflevector(__s1_511, __s1_511, 3, 2, 1, 0); \ - int16x8_t __rev2_511; __rev2_511 = __builtin_shufflevector(__s2_511, __s2_511, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_511 = __rev0_511 - __rev1_511 * __noswap_splat_laneq_s16(__rev2_511, __p3_511); \ - __ret_511 = __builtin_shufflevector(__ret_511, __ret_511, 3, 2, 1, 0); \ +#define vmlsq_laneq_u16(__p0_511, __p1_511, __p2_511, __p3_511) __extension__ ({ \ + uint16x8_t __ret_511; \ + uint16x8_t __s0_511 = __p0_511; \ + uint16x8_t __s1_511 = __p1_511; \ + uint16x8_t __s2_511 = __p2_511; \ + uint16x8_t __rev0_511; __rev0_511 = __builtin_shufflevector(__s0_511, __s0_511, __lane_reverse_128_16); \ + uint16x8_t __rev1_511; __rev1_511 = __builtin_shufflevector(__s1_511, __s1_511, __lane_reverse_128_16); \ + uint16x8_t __rev2_511; __rev2_511 = __builtin_shufflevector(__s2_511, __s2_511, __lane_reverse_128_16); \ + __ret_511 = __rev0_511 - __rev1_511 * __noswap_splatq_laneq_u16(__rev2_511, __p3_511); \ + __ret_511 = __builtin_shufflevector(__ret_511, __ret_511, __lane_reverse_128_16); \ __ret_511; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_u32(__p0_512, __p1_512, __p2_512, __p3_512) __extension__ ({ \ - uint64x2_t __ret_512; \ - uint64x2_t __s0_512 = __p0_512; \ - uint32x4_t __s1_512 = __p1_512; \ - uint32x2_t __s2_512 = __p2_512; \ - __ret_512 = __s0_512 - vmull_u32(vget_high_u32(__s1_512), splat_lane_u32(__s2_512, __p3_512)); \ +#define vmlsq_laneq_f32(__p0_512, __p1_512, __p2_512, __p3_512) __extension__ ({ \ + float32x4_t __ret_512; \ + float32x4_t __s0_512 = __p0_512; \ + float32x4_t __s1_512 = __p1_512; \ + float32x4_t __s2_512 = __p2_512; \ + __ret_512 = __s0_512 - __s1_512 * splatq_laneq_f32(__s2_512, __p3_512); \ __ret_512; \ }) #else -#define vmlsl_high_lane_u32(__p0_513, __p1_513, __p2_513, __p3_513) __extension__ ({ \ - uint64x2_t __ret_513; \ - uint64x2_t __s0_513 = __p0_513; \ - uint32x4_t __s1_513 = __p1_513; \ - uint32x2_t __s2_513 = __p2_513; \ - uint64x2_t __rev0_513; __rev0_513 = __builtin_shufflevector(__s0_513, __s0_513, 1, 0); \ - uint32x4_t __rev1_513; __rev1_513 = __builtin_shufflevector(__s1_513, __s1_513, 3, 2, 1, 0); \ - uint32x2_t __rev2_513; __rev2_513 = __builtin_shufflevector(__s2_513, __s2_513, 1, 0); \ - __ret_513 = __rev0_513 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_513), __noswap_splat_lane_u32(__rev2_513, __p3_513)); \ - __ret_513 = __builtin_shufflevector(__ret_513, __ret_513, 1, 0); \ +#define vmlsq_laneq_f32(__p0_513, __p1_513, __p2_513, __p3_513) __extension__ ({ \ + float32x4_t __ret_513; \ + float32x4_t __s0_513 = __p0_513; \ + float32x4_t __s1_513 = __p1_513; \ + float32x4_t __s2_513 = __p2_513; \ + float32x4_t __rev0_513; __rev0_513 = __builtin_shufflevector(__s0_513, __s0_513, __lane_reverse_128_32); \ + float32x4_t __rev1_513; __rev1_513 = __builtin_shufflevector(__s1_513, __s1_513, __lane_reverse_128_32); \ + float32x4_t __rev2_513; __rev2_513 = __builtin_shufflevector(__s2_513, __s2_513, __lane_reverse_128_32); \ + __ret_513 = __rev0_513 - __rev1_513 * __noswap_splatq_laneq_f32(__rev2_513, __p3_513); \ + __ret_513 = __builtin_shufflevector(__ret_513, __ret_513, __lane_reverse_128_32); \ __ret_513; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_u16(__p0_514, __p1_514, __p2_514, __p3_514) __extension__ ({ \ - uint32x4_t __ret_514; \ - uint32x4_t __s0_514 = __p0_514; \ - uint16x8_t __s1_514 = __p1_514; \ - uint16x4_t __s2_514 = __p2_514; \ - __ret_514 = __s0_514 - vmull_u16(vget_high_u16(__s1_514), splat_lane_u16(__s2_514, __p3_514)); \ +#define vmlsq_laneq_s32(__p0_514, __p1_514, __p2_514, __p3_514) __extension__ ({ \ + int32x4_t __ret_514; \ + int32x4_t __s0_514 = __p0_514; \ + int32x4_t __s1_514 = __p1_514; \ + int32x4_t __s2_514 = __p2_514; \ + __ret_514 = __s0_514 - __s1_514 * splatq_laneq_s32(__s2_514, __p3_514); \ __ret_514; \ }) #else -#define vmlsl_high_lane_u16(__p0_515, __p1_515, __p2_515, __p3_515) __extension__ ({ \ - uint32x4_t __ret_515; \ - uint32x4_t __s0_515 = __p0_515; \ - uint16x8_t __s1_515 = __p1_515; \ - uint16x4_t __s2_515 = __p2_515; \ - uint32x4_t __rev0_515; __rev0_515 = __builtin_shufflevector(__s0_515, __s0_515, 3, 2, 1, 0); \ - uint16x8_t __rev1_515; __rev1_515 = __builtin_shufflevector(__s1_515, __s1_515, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_515; __rev2_515 = __builtin_shufflevector(__s2_515, __s2_515, 3, 2, 1, 0); \ - __ret_515 = __rev0_515 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_515), __noswap_splat_lane_u16(__rev2_515, __p3_515)); \ - __ret_515 = __builtin_shufflevector(__ret_515, __ret_515, 3, 2, 1, 0); \ +#define vmlsq_laneq_s32(__p0_515, __p1_515, __p2_515, __p3_515) __extension__ ({ \ + int32x4_t __ret_515; \ + int32x4_t __s0_515 = __p0_515; \ + int32x4_t __s1_515 = __p1_515; \ + int32x4_t __s2_515 = __p2_515; \ + int32x4_t __rev0_515; __rev0_515 = __builtin_shufflevector(__s0_515, __s0_515, __lane_reverse_128_32); \ + int32x4_t __rev1_515; __rev1_515 = __builtin_shufflevector(__s1_515, __s1_515, __lane_reverse_128_32); \ + int32x4_t __rev2_515; __rev2_515 = __builtin_shufflevector(__s2_515, __s2_515, __lane_reverse_128_32); \ + __ret_515 = __rev0_515 - __rev1_515 * __noswap_splatq_laneq_s32(__rev2_515, __p3_515); \ + __ret_515 = __builtin_shufflevector(__ret_515, __ret_515, __lane_reverse_128_32); \ __ret_515; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_s32(__p0_516, __p1_516, __p2_516, __p3_516) __extension__ ({ \ - int64x2_t __ret_516; \ - int64x2_t __s0_516 = __p0_516; \ - int32x4_t __s1_516 = __p1_516; \ - int32x2_t __s2_516 = __p2_516; \ - __ret_516 = __s0_516 - vmull_s32(vget_high_s32(__s1_516), splat_lane_s32(__s2_516, __p3_516)); \ +#define vmlsq_laneq_s16(__p0_516, __p1_516, __p2_516, __p3_516) __extension__ ({ \ + int16x8_t __ret_516; \ + int16x8_t __s0_516 = __p0_516; \ + int16x8_t __s1_516 = __p1_516; \ + int16x8_t __s2_516 = __p2_516; \ + __ret_516 = __s0_516 - __s1_516 * splatq_laneq_s16(__s2_516, __p3_516); \ __ret_516; \ }) #else -#define vmlsl_high_lane_s32(__p0_517, __p1_517, __p2_517, __p3_517) __extension__ ({ \ - int64x2_t __ret_517; \ - int64x2_t __s0_517 = __p0_517; \ - int32x4_t __s1_517 = __p1_517; \ - int32x2_t __s2_517 = __p2_517; \ - int64x2_t __rev0_517; __rev0_517 = __builtin_shufflevector(__s0_517, __s0_517, 1, 0); \ - int32x4_t __rev1_517; __rev1_517 = __builtin_shufflevector(__s1_517, __s1_517, 3, 2, 1, 0); \ - int32x2_t __rev2_517; __rev2_517 = __builtin_shufflevector(__s2_517, __s2_517, 1, 0); \ - __ret_517 = __rev0_517 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_517), __noswap_splat_lane_s32(__rev2_517, __p3_517)); \ - __ret_517 = __builtin_shufflevector(__ret_517, __ret_517, 1, 0); \ +#define vmlsq_laneq_s16(__p0_517, __p1_517, __p2_517, __p3_517) __extension__ ({ \ + int16x8_t __ret_517; \ + int16x8_t __s0_517 = __p0_517; \ + int16x8_t __s1_517 = __p1_517; \ + int16x8_t __s2_517 = __p2_517; \ + int16x8_t __rev0_517; __rev0_517 = __builtin_shufflevector(__s0_517, __s0_517, __lane_reverse_128_16); \ + int16x8_t __rev1_517; __rev1_517 = __builtin_shufflevector(__s1_517, __s1_517, __lane_reverse_128_16); \ + int16x8_t __rev2_517; __rev2_517 = __builtin_shufflevector(__s2_517, __s2_517, __lane_reverse_128_16); \ + __ret_517 = __rev0_517 - __rev1_517 * __noswap_splatq_laneq_s16(__rev2_517, __p3_517); \ + __ret_517 = __builtin_shufflevector(__ret_517, __ret_517, __lane_reverse_128_16); \ __ret_517; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_s16(__p0_518, __p1_518, __p2_518, __p3_518) __extension__ ({ \ - int32x4_t __ret_518; \ - int32x4_t __s0_518 = __p0_518; \ - int16x8_t __s1_518 = __p1_518; \ - int16x4_t __s2_518 = __p2_518; \ - __ret_518 = __s0_518 - vmull_s16(vget_high_s16(__s1_518), splat_lane_s16(__s2_518, __p3_518)); \ +#define vmls_laneq_u32(__p0_518, __p1_518, __p2_518, __p3_518) __extension__ ({ \ + uint32x2_t __ret_518; \ + uint32x2_t __s0_518 = __p0_518; \ + uint32x2_t __s1_518 = __p1_518; \ + uint32x4_t __s2_518 = __p2_518; \ + __ret_518 = __s0_518 - __s1_518 * splat_laneq_u32(__s2_518, __p3_518); \ __ret_518; \ }) #else -#define vmlsl_high_lane_s16(__p0_519, __p1_519, __p2_519, __p3_519) __extension__ ({ \ - int32x4_t __ret_519; \ - int32x4_t __s0_519 = __p0_519; \ - int16x8_t __s1_519 = __p1_519; \ - int16x4_t __s2_519 = __p2_519; \ - int32x4_t __rev0_519; __rev0_519 = __builtin_shufflevector(__s0_519, __s0_519, 3, 2, 1, 0); \ - int16x8_t __rev1_519; __rev1_519 = __builtin_shufflevector(__s1_519, __s1_519, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_519; __rev2_519 = __builtin_shufflevector(__s2_519, __s2_519, 3, 2, 1, 0); \ - __ret_519 = __rev0_519 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_519), __noswap_splat_lane_s16(__rev2_519, __p3_519)); \ - __ret_519 = __builtin_shufflevector(__ret_519, __ret_519, 3, 2, 1, 0); \ +#define vmls_laneq_u32(__p0_519, __p1_519, __p2_519, __p3_519) __extension__ ({ \ + uint32x2_t __ret_519; \ + uint32x2_t __s0_519 = __p0_519; \ + uint32x2_t __s1_519 = __p1_519; \ + uint32x4_t __s2_519 = __p2_519; \ + uint32x2_t __rev0_519; __rev0_519 = __builtin_shufflevector(__s0_519, __s0_519, __lane_reverse_64_32); \ + uint32x2_t __rev1_519; __rev1_519 = __builtin_shufflevector(__s1_519, __s1_519, __lane_reverse_64_32); \ + uint32x4_t __rev2_519; __rev2_519 = __builtin_shufflevector(__s2_519, __s2_519, __lane_reverse_128_32); \ + __ret_519 = __rev0_519 - __rev1_519 * __noswap_splat_laneq_u32(__rev2_519, __p3_519); \ + __ret_519 = __builtin_shufflevector(__ret_519, __ret_519, __lane_reverse_64_32); \ __ret_519; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_u32(__p0_520, __p1_520, __p2_520, __p3_520) __extension__ ({ \ - uint64x2_t __ret_520; \ - uint64x2_t __s0_520 = __p0_520; \ - uint32x4_t __s1_520 = __p1_520; \ - uint32x4_t __s2_520 = __p2_520; \ - __ret_520 = __s0_520 - vmull_u32(vget_high_u32(__s1_520), splat_laneq_u32(__s2_520, __p3_520)); \ +#define vmls_laneq_u16(__p0_520, __p1_520, __p2_520, __p3_520) __extension__ ({ \ + uint16x4_t __ret_520; \ + uint16x4_t __s0_520 = __p0_520; \ + uint16x4_t __s1_520 = __p1_520; \ + uint16x8_t __s2_520 = __p2_520; \ + __ret_520 = __s0_520 - __s1_520 * splat_laneq_u16(__s2_520, __p3_520); \ __ret_520; \ }) #else -#define vmlsl_high_laneq_u32(__p0_521, __p1_521, __p2_521, __p3_521) __extension__ ({ \ - uint64x2_t __ret_521; \ - uint64x2_t __s0_521 = __p0_521; \ - uint32x4_t __s1_521 = __p1_521; \ - uint32x4_t __s2_521 = __p2_521; \ - uint64x2_t __rev0_521; __rev0_521 = __builtin_shufflevector(__s0_521, __s0_521, 1, 0); \ - uint32x4_t __rev1_521; __rev1_521 = __builtin_shufflevector(__s1_521, __s1_521, 3, 2, 1, 0); \ - uint32x4_t __rev2_521; __rev2_521 = __builtin_shufflevector(__s2_521, __s2_521, 3, 2, 1, 0); \ - __ret_521 = __rev0_521 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_521), __noswap_splat_laneq_u32(__rev2_521, __p3_521)); \ - __ret_521 = __builtin_shufflevector(__ret_521, __ret_521, 1, 0); \ +#define vmls_laneq_u16(__p0_521, __p1_521, __p2_521, __p3_521) __extension__ ({ \ + uint16x4_t __ret_521; \ + uint16x4_t __s0_521 = __p0_521; \ + uint16x4_t __s1_521 = __p1_521; \ + uint16x8_t __s2_521 = __p2_521; \ + uint16x4_t __rev0_521; __rev0_521 = __builtin_shufflevector(__s0_521, __s0_521, __lane_reverse_64_16); \ + uint16x4_t __rev1_521; __rev1_521 = __builtin_shufflevector(__s1_521, __s1_521, __lane_reverse_64_16); \ + uint16x8_t __rev2_521; __rev2_521 = __builtin_shufflevector(__s2_521, __s2_521, __lane_reverse_128_16); \ + __ret_521 = __rev0_521 - __rev1_521 * __noswap_splat_laneq_u16(__rev2_521, __p3_521); \ + __ret_521 = __builtin_shufflevector(__ret_521, __ret_521, __lane_reverse_64_16); \ __ret_521; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_u16(__p0_522, __p1_522, __p2_522, __p3_522) __extension__ ({ \ - uint32x4_t __ret_522; \ - uint32x4_t __s0_522 = __p0_522; \ - uint16x8_t __s1_522 = __p1_522; \ - uint16x8_t __s2_522 = __p2_522; \ - __ret_522 = __s0_522 - vmull_u16(vget_high_u16(__s1_522), splat_laneq_u16(__s2_522, __p3_522)); \ +#define vmls_laneq_f32(__p0_522, __p1_522, __p2_522, __p3_522) __extension__ ({ \ + float32x2_t __ret_522; \ + float32x2_t __s0_522 = __p0_522; \ + float32x2_t __s1_522 = __p1_522; \ + float32x4_t __s2_522 = __p2_522; \ + __ret_522 = __s0_522 - __s1_522 * splat_laneq_f32(__s2_522, __p3_522); \ __ret_522; \ }) #else -#define vmlsl_high_laneq_u16(__p0_523, __p1_523, __p2_523, __p3_523) __extension__ ({ \ - uint32x4_t __ret_523; \ - uint32x4_t __s0_523 = __p0_523; \ - uint16x8_t __s1_523 = __p1_523; \ - uint16x8_t __s2_523 = __p2_523; \ - uint32x4_t __rev0_523; __rev0_523 = __builtin_shufflevector(__s0_523, __s0_523, 3, 2, 1, 0); \ - uint16x8_t __rev1_523; __rev1_523 = __builtin_shufflevector(__s1_523, __s1_523, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_523; __rev2_523 = __builtin_shufflevector(__s2_523, __s2_523, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_523 = __rev0_523 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_523), __noswap_splat_laneq_u16(__rev2_523, __p3_523)); \ - __ret_523 = __builtin_shufflevector(__ret_523, __ret_523, 3, 2, 1, 0); \ +#define vmls_laneq_f32(__p0_523, __p1_523, __p2_523, __p3_523) __extension__ ({ \ + float32x2_t __ret_523; \ + float32x2_t __s0_523 = __p0_523; \ + float32x2_t __s1_523 = __p1_523; \ + float32x4_t __s2_523 = __p2_523; \ + float32x2_t __rev0_523; __rev0_523 = __builtin_shufflevector(__s0_523, __s0_523, __lane_reverse_64_32); \ + float32x2_t __rev1_523; __rev1_523 = __builtin_shufflevector(__s1_523, __s1_523, __lane_reverse_64_32); \ + float32x4_t __rev2_523; __rev2_523 = __builtin_shufflevector(__s2_523, __s2_523, __lane_reverse_128_32); \ + __ret_523 = __rev0_523 - __rev1_523 * __noswap_splat_laneq_f32(__rev2_523, __p3_523); \ + __ret_523 = __builtin_shufflevector(__ret_523, __ret_523, __lane_reverse_64_32); \ __ret_523; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_s32(__p0_524, __p1_524, __p2_524, __p3_524) __extension__ ({ \ - int64x2_t __ret_524; \ - int64x2_t __s0_524 = __p0_524; \ - int32x4_t __s1_524 = __p1_524; \ +#define vmls_laneq_s32(__p0_524, __p1_524, __p2_524, __p3_524) __extension__ ({ \ + int32x2_t __ret_524; \ + int32x2_t __s0_524 = __p0_524; \ + int32x2_t __s1_524 = __p1_524; \ int32x4_t __s2_524 = __p2_524; \ - __ret_524 = __s0_524 - vmull_s32(vget_high_s32(__s1_524), splat_laneq_s32(__s2_524, __p3_524)); \ + __ret_524 = __s0_524 - __s1_524 * splat_laneq_s32(__s2_524, __p3_524); \ __ret_524; \ }) #else -#define vmlsl_high_laneq_s32(__p0_525, __p1_525, __p2_525, __p3_525) __extension__ ({ \ - int64x2_t __ret_525; \ - int64x2_t __s0_525 = __p0_525; \ - int32x4_t __s1_525 = __p1_525; \ +#define vmls_laneq_s32(__p0_525, __p1_525, __p2_525, __p3_525) __extension__ ({ \ + int32x2_t __ret_525; \ + int32x2_t __s0_525 = __p0_525; \ + int32x2_t __s1_525 = __p1_525; \ int32x4_t __s2_525 = __p2_525; \ - int64x2_t __rev0_525; __rev0_525 = __builtin_shufflevector(__s0_525, __s0_525, 1, 0); \ - int32x4_t __rev1_525; __rev1_525 = __builtin_shufflevector(__s1_525, __s1_525, 3, 2, 1, 0); \ - int32x4_t __rev2_525; __rev2_525 = __builtin_shufflevector(__s2_525, __s2_525, 3, 2, 1, 0); \ - __ret_525 = __rev0_525 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_525), __noswap_splat_laneq_s32(__rev2_525, __p3_525)); \ - __ret_525 = __builtin_shufflevector(__ret_525, __ret_525, 1, 0); \ + int32x2_t __rev0_525; __rev0_525 = __builtin_shufflevector(__s0_525, __s0_525, __lane_reverse_64_32); \ + int32x2_t __rev1_525; __rev1_525 = __builtin_shufflevector(__s1_525, __s1_525, __lane_reverse_64_32); \ + int32x4_t __rev2_525; __rev2_525 = __builtin_shufflevector(__s2_525, __s2_525, __lane_reverse_128_32); \ + __ret_525 = __rev0_525 - __rev1_525 * __noswap_splat_laneq_s32(__rev2_525, __p3_525); \ + __ret_525 = __builtin_shufflevector(__ret_525, __ret_525, __lane_reverse_64_32); \ __ret_525; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_s16(__p0_526, __p1_526, __p2_526, __p3_526) __extension__ ({ \ - int32x4_t __ret_526; \ - int32x4_t __s0_526 = __p0_526; \ - int16x8_t __s1_526 = __p1_526; \ +#define vmls_laneq_s16(__p0_526, __p1_526, __p2_526, __p3_526) __extension__ ({ \ + int16x4_t __ret_526; \ + int16x4_t __s0_526 = __p0_526; \ + int16x4_t __s1_526 = __p1_526; \ int16x8_t __s2_526 = __p2_526; \ - __ret_526 = __s0_526 - vmull_s16(vget_high_s16(__s1_526), splat_laneq_s16(__s2_526, __p3_526)); \ + __ret_526 = __s0_526 - __s1_526 * splat_laneq_s16(__s2_526, __p3_526); \ __ret_526; \ }) #else -#define vmlsl_high_laneq_s16(__p0_527, __p1_527, __p2_527, __p3_527) __extension__ ({ \ - int32x4_t __ret_527; \ - int32x4_t __s0_527 = __p0_527; \ - int16x8_t __s1_527 = __p1_527; \ +#define vmls_laneq_s16(__p0_527, __p1_527, __p2_527, __p3_527) __extension__ ({ \ + int16x4_t __ret_527; \ + int16x4_t __s0_527 = __p0_527; \ + int16x4_t __s1_527 = __p1_527; \ int16x8_t __s2_527 = __p2_527; \ - int32x4_t __rev0_527; __rev0_527 = __builtin_shufflevector(__s0_527, __s0_527, 3, 2, 1, 0); \ - int16x8_t __rev1_527; __rev1_527 = __builtin_shufflevector(__s1_527, __s1_527, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_527; __rev2_527 = __builtin_shufflevector(__s2_527, __s2_527, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_527 = __rev0_527 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_527), __noswap_splat_laneq_s16(__rev2_527, __p3_527)); \ - __ret_527 = __builtin_shufflevector(__ret_527, __ret_527, 3, 2, 1, 0); \ + int16x4_t __rev0_527; __rev0_527 = __builtin_shufflevector(__s0_527, __s0_527, __lane_reverse_64_16); \ + int16x4_t __rev1_527; __rev1_527 = __builtin_shufflevector(__s1_527, __s1_527, __lane_reverse_64_16); \ + int16x8_t __rev2_527; __rev2_527 = __builtin_shufflevector(__s2_527, __s2_527, __lane_reverse_128_16); \ + __ret_527 = __rev0_527 - __rev1_527 * __noswap_splat_laneq_s16(__rev2_527, __p3_527); \ + __ret_527 = __builtin_shufflevector(__ret_527, __ret_527, __lane_reverse_64_16); \ __ret_527; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_u32(__p0_528, __p1_528, __p2_528, __p3_528) __extension__ ({ \ +#define vmlsl_high_lane_u32(__p0_528, __p1_528, __p2_528, __p3_528) __extension__ ({ \ uint64x2_t __ret_528; \ uint64x2_t __s0_528 = __p0_528; \ - uint32x2_t __s1_528 = __p1_528; \ - uint32x4_t __s2_528 = __p2_528; \ - __ret_528 = __s0_528 - vmull_u32(__s1_528, splat_laneq_u32(__s2_528, __p3_528)); \ + uint32x4_t __s1_528 = __p1_528; \ + uint32x2_t __s2_528 = __p2_528; \ + __ret_528 = __s0_528 - vmull_u32(vget_high_u32(__s1_528), splat_lane_u32(__s2_528, __p3_528)); \ __ret_528; \ }) #else -#define vmlsl_laneq_u32(__p0_529, __p1_529, __p2_529, __p3_529) __extension__ ({ \ +#define vmlsl_high_lane_u32(__p0_529, __p1_529, __p2_529, __p3_529) __extension__ ({ \ uint64x2_t __ret_529; \ uint64x2_t __s0_529 = __p0_529; \ - uint32x2_t __s1_529 = __p1_529; \ - uint32x4_t __s2_529 = __p2_529; \ - uint64x2_t __rev0_529; __rev0_529 = __builtin_shufflevector(__s0_529, __s0_529, 1, 0); \ - uint32x2_t __rev1_529; __rev1_529 = __builtin_shufflevector(__s1_529, __s1_529, 1, 0); \ - uint32x4_t __rev2_529; __rev2_529 = __builtin_shufflevector(__s2_529, __s2_529, 3, 2, 1, 0); \ - __ret_529 = __rev0_529 - __noswap_vmull_u32(__rev1_529, __noswap_splat_laneq_u32(__rev2_529, __p3_529)); \ - __ret_529 = __builtin_shufflevector(__ret_529, __ret_529, 1, 0); \ + uint32x4_t __s1_529 = __p1_529; \ + uint32x2_t __s2_529 = __p2_529; \ + uint64x2_t __rev0_529; __rev0_529 = __builtin_shufflevector(__s0_529, __s0_529, __lane_reverse_128_64); \ + uint32x4_t __rev1_529; __rev1_529 = __builtin_shufflevector(__s1_529, __s1_529, __lane_reverse_128_32); \ + uint32x2_t __rev2_529; __rev2_529 = __builtin_shufflevector(__s2_529, __s2_529, __lane_reverse_64_32); \ + __ret_529 = __rev0_529 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_529), __noswap_splat_lane_u32(__rev2_529, __p3_529)); \ + __ret_529 = __builtin_shufflevector(__ret_529, __ret_529, __lane_reverse_128_64); \ __ret_529; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_u16(__p0_530, __p1_530, __p2_530, __p3_530) __extension__ ({ \ +#define vmlsl_high_lane_u16(__p0_530, __p1_530, __p2_530, __p3_530) __extension__ ({ \ uint32x4_t __ret_530; \ uint32x4_t __s0_530 = __p0_530; \ - uint16x4_t __s1_530 = __p1_530; \ - uint16x8_t __s2_530 = __p2_530; \ - __ret_530 = __s0_530 - vmull_u16(__s1_530, splat_laneq_u16(__s2_530, __p3_530)); \ + uint16x8_t __s1_530 = __p1_530; \ + uint16x4_t __s2_530 = __p2_530; \ + __ret_530 = __s0_530 - vmull_u16(vget_high_u16(__s1_530), splat_lane_u16(__s2_530, __p3_530)); \ __ret_530; \ }) #else -#define vmlsl_laneq_u16(__p0_531, __p1_531, __p2_531, __p3_531) __extension__ ({ \ +#define vmlsl_high_lane_u16(__p0_531, __p1_531, __p2_531, __p3_531) __extension__ ({ \ uint32x4_t __ret_531; \ uint32x4_t __s0_531 = __p0_531; \ - uint16x4_t __s1_531 = __p1_531; \ - uint16x8_t __s2_531 = __p2_531; \ - uint32x4_t __rev0_531; __rev0_531 = __builtin_shufflevector(__s0_531, __s0_531, 3, 2, 1, 0); \ - uint16x4_t __rev1_531; __rev1_531 = __builtin_shufflevector(__s1_531, __s1_531, 3, 2, 1, 0); \ - uint16x8_t __rev2_531; __rev2_531 = __builtin_shufflevector(__s2_531, __s2_531, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_531 = __rev0_531 - __noswap_vmull_u16(__rev1_531, __noswap_splat_laneq_u16(__rev2_531, __p3_531)); \ - __ret_531 = __builtin_shufflevector(__ret_531, __ret_531, 3, 2, 1, 0); \ + uint16x8_t __s1_531 = __p1_531; \ + uint16x4_t __s2_531 = __p2_531; \ + uint32x4_t __rev0_531; __rev0_531 = __builtin_shufflevector(__s0_531, __s0_531, __lane_reverse_128_32); \ + uint16x8_t __rev1_531; __rev1_531 = __builtin_shufflevector(__s1_531, __s1_531, __lane_reverse_128_16); \ + uint16x4_t __rev2_531; __rev2_531 = __builtin_shufflevector(__s2_531, __s2_531, __lane_reverse_64_16); \ + __ret_531 = __rev0_531 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_531), __noswap_splat_lane_u16(__rev2_531, __p3_531)); \ + __ret_531 = __builtin_shufflevector(__ret_531, __ret_531, __lane_reverse_128_32); \ __ret_531; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_s32(__p0_532, __p1_532, __p2_532, __p3_532) __extension__ ({ \ +#define vmlsl_high_lane_s32(__p0_532, __p1_532, __p2_532, __p3_532) __extension__ ({ \ int64x2_t __ret_532; \ int64x2_t __s0_532 = __p0_532; \ - int32x2_t __s1_532 = __p1_532; \ - int32x4_t __s2_532 = __p2_532; \ - __ret_532 = __s0_532 - vmull_s32(__s1_532, splat_laneq_s32(__s2_532, __p3_532)); \ + int32x4_t __s1_532 = __p1_532; \ + int32x2_t __s2_532 = __p2_532; \ + __ret_532 = __s0_532 - vmull_s32(vget_high_s32(__s1_532), splat_lane_s32(__s2_532, __p3_532)); \ __ret_532; \ }) #else -#define vmlsl_laneq_s32(__p0_533, __p1_533, __p2_533, __p3_533) __extension__ ({ \ +#define vmlsl_high_lane_s32(__p0_533, __p1_533, __p2_533, __p3_533) __extension__ ({ \ int64x2_t __ret_533; \ int64x2_t __s0_533 = __p0_533; \ - int32x2_t __s1_533 = __p1_533; \ - int32x4_t __s2_533 = __p2_533; \ - int64x2_t __rev0_533; __rev0_533 = __builtin_shufflevector(__s0_533, __s0_533, 1, 0); \ - int32x2_t __rev1_533; __rev1_533 = __builtin_shufflevector(__s1_533, __s1_533, 1, 0); \ - int32x4_t __rev2_533; __rev2_533 = __builtin_shufflevector(__s2_533, __s2_533, 3, 2, 1, 0); \ - __ret_533 = __rev0_533 - __noswap_vmull_s32(__rev1_533, __noswap_splat_laneq_s32(__rev2_533, __p3_533)); \ - __ret_533 = __builtin_shufflevector(__ret_533, __ret_533, 1, 0); \ + int32x4_t __s1_533 = __p1_533; \ + int32x2_t __s2_533 = __p2_533; \ + int64x2_t __rev0_533; __rev0_533 = __builtin_shufflevector(__s0_533, __s0_533, __lane_reverse_128_64); \ + int32x4_t __rev1_533; __rev1_533 = __builtin_shufflevector(__s1_533, __s1_533, __lane_reverse_128_32); \ + int32x2_t __rev2_533; __rev2_533 = __builtin_shufflevector(__s2_533, __s2_533, __lane_reverse_64_32); \ + __ret_533 = __rev0_533 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_533), __noswap_splat_lane_s32(__rev2_533, __p3_533)); \ + __ret_533 = __builtin_shufflevector(__ret_533, __ret_533, __lane_reverse_128_64); \ __ret_533; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_s16(__p0_534, __p1_534, __p2_534, __p3_534) __extension__ ({ \ +#define vmlsl_high_lane_s16(__p0_534, __p1_534, __p2_534, __p3_534) __extension__ ({ \ int32x4_t __ret_534; \ int32x4_t __s0_534 = __p0_534; \ - int16x4_t __s1_534 = __p1_534; \ - int16x8_t __s2_534 = __p2_534; \ - __ret_534 = __s0_534 - vmull_s16(__s1_534, splat_laneq_s16(__s2_534, __p3_534)); \ + int16x8_t __s1_534 = __p1_534; \ + int16x4_t __s2_534 = __p2_534; \ + __ret_534 = __s0_534 - vmull_s16(vget_high_s16(__s1_534), splat_lane_s16(__s2_534, __p3_534)); \ __ret_534; \ }) #else -#define vmlsl_laneq_s16(__p0_535, __p1_535, __p2_535, __p3_535) __extension__ ({ \ +#define vmlsl_high_lane_s16(__p0_535, __p1_535, __p2_535, __p3_535) __extension__ ({ \ int32x4_t __ret_535; \ int32x4_t __s0_535 = __p0_535; \ - int16x4_t __s1_535 = __p1_535; \ - int16x8_t __s2_535 = __p2_535; \ - int32x4_t __rev0_535; __rev0_535 = __builtin_shufflevector(__s0_535, __s0_535, 3, 2, 1, 0); \ - int16x4_t __rev1_535; __rev1_535 = __builtin_shufflevector(__s1_535, __s1_535, 3, 2, 1, 0); \ - int16x8_t __rev2_535; __rev2_535 = __builtin_shufflevector(__s2_535, __s2_535, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_535 = __rev0_535 - __noswap_vmull_s16(__rev1_535, __noswap_splat_laneq_s16(__rev2_535, __p3_535)); \ - __ret_535 = __builtin_shufflevector(__ret_535, __ret_535, 3, 2, 1, 0); \ + int16x8_t __s1_535 = __p1_535; \ + int16x4_t __s2_535 = __p2_535; \ + int32x4_t __rev0_535; __rev0_535 = __builtin_shufflevector(__s0_535, __s0_535, __lane_reverse_128_32); \ + int16x8_t __rev1_535; __rev1_535 = __builtin_shufflevector(__s1_535, __s1_535, __lane_reverse_128_16); \ + int16x4_t __rev2_535; __rev2_535 = __builtin_shufflevector(__s2_535, __s2_535, __lane_reverse_64_16); \ + __ret_535 = __rev0_535 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_535), __noswap_splat_lane_s16(__rev2_535, __p3_535)); \ + __ret_535 = __builtin_shufflevector(__ret_535, __ret_535, __lane_reverse_128_32); \ __ret_535; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_high_laneq_u32(__p0_536, __p1_536, __p2_536, __p3_536) __extension__ ({ \ + uint64x2_t __ret_536; \ + uint64x2_t __s0_536 = __p0_536; \ + uint32x4_t __s1_536 = __p1_536; \ + uint32x4_t __s2_536 = __p2_536; \ + __ret_536 = __s0_536 - vmull_u32(vget_high_u32(__s1_536), splat_laneq_u32(__s2_536, __p3_536)); \ + __ret_536; \ +}) +#else +#define vmlsl_high_laneq_u32(__p0_537, __p1_537, __p2_537, __p3_537) __extension__ ({ \ + uint64x2_t __ret_537; \ + uint64x2_t __s0_537 = __p0_537; \ + uint32x4_t __s1_537 = __p1_537; \ + uint32x4_t __s2_537 = __p2_537; \ + uint64x2_t __rev0_537; __rev0_537 = __builtin_shufflevector(__s0_537, __s0_537, __lane_reverse_128_64); \ + uint32x4_t __rev1_537; __rev1_537 = __builtin_shufflevector(__s1_537, __s1_537, __lane_reverse_128_32); \ + uint32x4_t __rev2_537; __rev2_537 = __builtin_shufflevector(__s2_537, __s2_537, __lane_reverse_128_32); \ + __ret_537 = __rev0_537 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_537), __noswap_splat_laneq_u32(__rev2_537, __p3_537)); \ + __ret_537 = __builtin_shufflevector(__ret_537, __ret_537, __lane_reverse_128_64); \ + __ret_537; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_high_laneq_u16(__p0_538, __p1_538, __p2_538, __p3_538) __extension__ ({ \ + uint32x4_t __ret_538; \ + uint32x4_t __s0_538 = __p0_538; \ + uint16x8_t __s1_538 = __p1_538; \ + uint16x8_t __s2_538 = __p2_538; \ + __ret_538 = __s0_538 - vmull_u16(vget_high_u16(__s1_538), splat_laneq_u16(__s2_538, __p3_538)); \ + __ret_538; \ +}) +#else +#define vmlsl_high_laneq_u16(__p0_539, __p1_539, __p2_539, __p3_539) __extension__ ({ \ + uint32x4_t __ret_539; \ + uint32x4_t __s0_539 = __p0_539; \ + uint16x8_t __s1_539 = __p1_539; \ + uint16x8_t __s2_539 = __p2_539; \ + uint32x4_t __rev0_539; __rev0_539 = __builtin_shufflevector(__s0_539, __s0_539, __lane_reverse_128_32); \ + uint16x8_t __rev1_539; __rev1_539 = __builtin_shufflevector(__s1_539, __s1_539, __lane_reverse_128_16); \ + uint16x8_t __rev2_539; __rev2_539 = __builtin_shufflevector(__s2_539, __s2_539, __lane_reverse_128_16); \ + __ret_539 = __rev0_539 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_539), __noswap_splat_laneq_u16(__rev2_539, __p3_539)); \ + __ret_539 = __builtin_shufflevector(__ret_539, __ret_539, __lane_reverse_128_32); \ + __ret_539; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_high_laneq_s32(__p0_540, __p1_540, __p2_540, __p3_540) __extension__ ({ \ + int64x2_t __ret_540; \ + int64x2_t __s0_540 = __p0_540; \ + int32x4_t __s1_540 = __p1_540; \ + int32x4_t __s2_540 = __p2_540; \ + __ret_540 = __s0_540 - vmull_s32(vget_high_s32(__s1_540), splat_laneq_s32(__s2_540, __p3_540)); \ + __ret_540; \ +}) +#else +#define vmlsl_high_laneq_s32(__p0_541, __p1_541, __p2_541, __p3_541) __extension__ ({ \ + int64x2_t __ret_541; \ + int64x2_t __s0_541 = __p0_541; \ + int32x4_t __s1_541 = __p1_541; \ + int32x4_t __s2_541 = __p2_541; \ + int64x2_t __rev0_541; __rev0_541 = __builtin_shufflevector(__s0_541, __s0_541, __lane_reverse_128_64); \ + int32x4_t __rev1_541; __rev1_541 = __builtin_shufflevector(__s1_541, __s1_541, __lane_reverse_128_32); \ + int32x4_t __rev2_541; __rev2_541 = __builtin_shufflevector(__s2_541, __s2_541, __lane_reverse_128_32); \ + __ret_541 = __rev0_541 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_541), __noswap_splat_laneq_s32(__rev2_541, __p3_541)); \ + __ret_541 = __builtin_shufflevector(__ret_541, __ret_541, __lane_reverse_128_64); \ + __ret_541; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_high_laneq_s16(__p0_542, __p1_542, __p2_542, __p3_542) __extension__ ({ \ + int32x4_t __ret_542; \ + int32x4_t __s0_542 = __p0_542; \ + int16x8_t __s1_542 = __p1_542; \ + int16x8_t __s2_542 = __p2_542; \ + __ret_542 = __s0_542 - vmull_s16(vget_high_s16(__s1_542), splat_laneq_s16(__s2_542, __p3_542)); \ + __ret_542; \ +}) +#else +#define vmlsl_high_laneq_s16(__p0_543, __p1_543, __p2_543, __p3_543) __extension__ ({ \ + int32x4_t __ret_543; \ + int32x4_t __s0_543 = __p0_543; \ + int16x8_t __s1_543 = __p1_543; \ + int16x8_t __s2_543 = __p2_543; \ + int32x4_t __rev0_543; __rev0_543 = __builtin_shufflevector(__s0_543, __s0_543, __lane_reverse_128_32); \ + int16x8_t __rev1_543; __rev1_543 = __builtin_shufflevector(__s1_543, __s1_543, __lane_reverse_128_16); \ + int16x8_t __rev2_543; __rev2_543 = __builtin_shufflevector(__s2_543, __s2_543, __lane_reverse_128_16); \ + __ret_543 = __rev0_543 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_543), __noswap_splat_laneq_s16(__rev2_543, __p3_543)); \ + __ret_543 = __builtin_shufflevector(__ret_543, __ret_543, __lane_reverse_128_32); \ + __ret_543; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_u32(__p0_544, __p1_544, __p2_544, __p3_544) __extension__ ({ \ + uint64x2_t __ret_544; \ + uint64x2_t __s0_544 = __p0_544; \ + uint32x2_t __s1_544 = __p1_544; \ + uint32x4_t __s2_544 = __p2_544; \ + __ret_544 = __s0_544 - vmull_u32(__s1_544, splat_laneq_u32(__s2_544, __p3_544)); \ + __ret_544; \ +}) +#else +#define vmlsl_laneq_u32(__p0_545, __p1_545, __p2_545, __p3_545) __extension__ ({ \ + uint64x2_t __ret_545; \ + uint64x2_t __s0_545 = __p0_545; \ + uint32x2_t __s1_545 = __p1_545; \ + uint32x4_t __s2_545 = __p2_545; \ + uint64x2_t __rev0_545; __rev0_545 = __builtin_shufflevector(__s0_545, __s0_545, __lane_reverse_128_64); \ + uint32x2_t __rev1_545; __rev1_545 = __builtin_shufflevector(__s1_545, __s1_545, __lane_reverse_64_32); \ + uint32x4_t __rev2_545; __rev2_545 = __builtin_shufflevector(__s2_545, __s2_545, __lane_reverse_128_32); \ + __ret_545 = __rev0_545 - __noswap_vmull_u32(__rev1_545, __noswap_splat_laneq_u32(__rev2_545, __p3_545)); \ + __ret_545 = __builtin_shufflevector(__ret_545, __ret_545, __lane_reverse_128_64); \ + __ret_545; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_u16(__p0_546, __p1_546, __p2_546, __p3_546) __extension__ ({ \ + uint32x4_t __ret_546; \ + uint32x4_t __s0_546 = __p0_546; \ + uint16x4_t __s1_546 = __p1_546; \ + uint16x8_t __s2_546 = __p2_546; \ + __ret_546 = __s0_546 - vmull_u16(__s1_546, splat_laneq_u16(__s2_546, __p3_546)); \ + __ret_546; \ +}) +#else +#define vmlsl_laneq_u16(__p0_547, __p1_547, __p2_547, __p3_547) __extension__ ({ \ + uint32x4_t __ret_547; \ + uint32x4_t __s0_547 = __p0_547; \ + uint16x4_t __s1_547 = __p1_547; \ + uint16x8_t __s2_547 = __p2_547; \ + uint32x4_t __rev0_547; __rev0_547 = __builtin_shufflevector(__s0_547, __s0_547, __lane_reverse_128_32); \ + uint16x4_t __rev1_547; __rev1_547 = __builtin_shufflevector(__s1_547, __s1_547, __lane_reverse_64_16); \ + uint16x8_t __rev2_547; __rev2_547 = __builtin_shufflevector(__s2_547, __s2_547, __lane_reverse_128_16); \ + __ret_547 = __rev0_547 - __noswap_vmull_u16(__rev1_547, __noswap_splat_laneq_u16(__rev2_547, __p3_547)); \ + __ret_547 = __builtin_shufflevector(__ret_547, __ret_547, __lane_reverse_128_32); \ + __ret_547; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_s32(__p0_548, __p1_548, __p2_548, __p3_548) __extension__ ({ \ + int64x2_t __ret_548; \ + int64x2_t __s0_548 = __p0_548; \ + int32x2_t __s1_548 = __p1_548; \ + int32x4_t __s2_548 = __p2_548; \ + __ret_548 = __s0_548 - vmull_s32(__s1_548, splat_laneq_s32(__s2_548, __p3_548)); \ + __ret_548; \ +}) +#else +#define vmlsl_laneq_s32(__p0_549, __p1_549, __p2_549, __p3_549) __extension__ ({ \ + int64x2_t __ret_549; \ + int64x2_t __s0_549 = __p0_549; \ + int32x2_t __s1_549 = __p1_549; \ + int32x4_t __s2_549 = __p2_549; \ + int64x2_t __rev0_549; __rev0_549 = __builtin_shufflevector(__s0_549, __s0_549, __lane_reverse_128_64); \ + int32x2_t __rev1_549; __rev1_549 = __builtin_shufflevector(__s1_549, __s1_549, __lane_reverse_64_32); \ + int32x4_t __rev2_549; __rev2_549 = __builtin_shufflevector(__s2_549, __s2_549, __lane_reverse_128_32); \ + __ret_549 = __rev0_549 - __noswap_vmull_s32(__rev1_549, __noswap_splat_laneq_s32(__rev2_549, __p3_549)); \ + __ret_549 = __builtin_shufflevector(__ret_549, __ret_549, __lane_reverse_128_64); \ + __ret_549; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlsl_laneq_s16(__p0_550, __p1_550, __p2_550, __p3_550) __extension__ ({ \ + int32x4_t __ret_550; \ + int32x4_t __s0_550 = __p0_550; \ + int16x4_t __s1_550 = __p1_550; \ + int16x8_t __s2_550 = __p2_550; \ + __ret_550 = __s0_550 - vmull_s16(__s1_550, splat_laneq_s16(__s2_550, __p3_550)); \ + __ret_550; \ +}) +#else +#define vmlsl_laneq_s16(__p0_551, __p1_551, __p2_551, __p3_551) __extension__ ({ \ + int32x4_t __ret_551; \ + int32x4_t __s0_551 = __p0_551; \ + int16x4_t __s1_551 = __p1_551; \ + int16x8_t __s2_551 = __p2_551; \ + int32x4_t __rev0_551; __rev0_551 = __builtin_shufflevector(__s0_551, __s0_551, __lane_reverse_128_32); \ + int16x4_t __rev1_551; __rev1_551 = __builtin_shufflevector(__s1_551, __s1_551, __lane_reverse_64_16); \ + int16x8_t __rev2_551; __rev2_551 = __builtin_shufflevector(__s2_551, __s2_551, __lane_reverse_128_16); \ + __ret_551 = __rev0_551 - __noswap_vmull_s16(__rev1_551, __noswap_splat_laneq_s16(__rev2_551, __p3_551)); \ + __ret_551 = __builtin_shufflevector(__ret_551, __ret_551, __lane_reverse_128_32); \ + __ret_551; \ +}) +#endif + __ai __attribute__((target("neon"))) poly64x1_t vmov_n_p64(poly64_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t) {__p0}; @@ -54028,7 +55686,7 @@ __ai __attribute__((target("neon"))) poly64x2_t vmovq_n_p64(poly64_t __p0) { __ai __attribute__((target("neon"))) poly64x2_t vmovq_n_p64(poly64_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54043,7 +55701,7 @@ __ai __attribute__((target("neon"))) float64x2_t vmovq_n_f64(float64_t __p0) { __ai __attribute__((target("neon"))) float64x2_t vmovq_n_f64(float64_t __p0) { float64x2_t __ret; __ret = (float64x2_t) {__p0, __p0}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54054,147 +55712,147 @@ __ai __attribute__((target("neon"))) float64x1_t vmov_n_f64(float64_t __p0) { return __ret; } #ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("neon"))) uint16x8_t vmovl_high_u8(uint8x16_t __p0_536) { - uint16x8_t __ret_536; - uint8x8_t __a1_536 = vget_high_u8(__p0_536); - __ret_536 = (uint16x8_t)(vshll_n_u8(__a1_536, 0)); - return __ret_536; -} -#else -__ai __attribute__((target("neon"))) uint16x8_t vmovl_high_u8(uint8x16_t __p0_537) { - uint16x8_t __ret_537; - uint8x16_t __rev0_537; __rev0_537 = __builtin_shufflevector(__p0_537, __p0_537, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __a1_537 = __noswap_vget_high_u8(__rev0_537); - __ret_537 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_537, 0)); - __ret_537 = __builtin_shufflevector(__ret_537, __ret_537, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret_537; -} -__ai __attribute__((target("neon"))) uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_538) { - uint16x8_t __ret_538; - uint8x8_t __a1_538 = __noswap_vget_high_u8(__p0_538); - __ret_538 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_538, 0)); - return __ret_538; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("neon"))) uint64x2_t vmovl_high_u32(uint32x4_t __p0_539) { - uint64x2_t __ret_539; - uint32x2_t __a1_539 = vget_high_u32(__p0_539); - __ret_539 = (uint64x2_t)(vshll_n_u32(__a1_539, 0)); - return __ret_539; -} -#else -__ai __attribute__((target("neon"))) uint64x2_t vmovl_high_u32(uint32x4_t __p0_540) { - uint64x2_t __ret_540; - uint32x4_t __rev0_540; __rev0_540 = __builtin_shufflevector(__p0_540, __p0_540, 3, 2, 1, 0); - uint32x2_t __a1_540 = __noswap_vget_high_u32(__rev0_540); - __ret_540 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_540, 0)); - __ret_540 = __builtin_shufflevector(__ret_540, __ret_540, 1, 0); - return __ret_540; -} -__ai __attribute__((target("neon"))) uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_541) { - uint64x2_t __ret_541; - uint32x2_t __a1_541 = __noswap_vget_high_u32(__p0_541); - __ret_541 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_541, 0)); - return __ret_541; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("neon"))) uint32x4_t vmovl_high_u16(uint16x8_t __p0_542) { - uint32x4_t __ret_542; - uint16x4_t __a1_542 = vget_high_u16(__p0_542); - __ret_542 = (uint32x4_t)(vshll_n_u16(__a1_542, 0)); - return __ret_542; -} -#else -__ai __attribute__((target("neon"))) uint32x4_t vmovl_high_u16(uint16x8_t __p0_543) { - uint32x4_t __ret_543; - uint16x8_t __rev0_543; __rev0_543 = __builtin_shufflevector(__p0_543, __p0_543, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x4_t __a1_543 = __noswap_vget_high_u16(__rev0_543); - __ret_543 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_543, 0)); - __ret_543 = __builtin_shufflevector(__ret_543, __ret_543, 3, 2, 1, 0); - return __ret_543; -} -__ai __attribute__((target("neon"))) uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_544) { - uint32x4_t __ret_544; - uint16x4_t __a1_544 = __noswap_vget_high_u16(__p0_544); - __ret_544 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_544, 0)); - return __ret_544; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("neon"))) int16x8_t vmovl_high_s8(int8x16_t __p0_545) { - int16x8_t __ret_545; - int8x8_t __a1_545 = vget_high_s8(__p0_545); - __ret_545 = (int16x8_t)(vshll_n_s8(__a1_545, 0)); - return __ret_545; -} -#else -__ai __attribute__((target("neon"))) int16x8_t vmovl_high_s8(int8x16_t __p0_546) { - int16x8_t __ret_546; - int8x16_t __rev0_546; __rev0_546 = __builtin_shufflevector(__p0_546, __p0_546, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __a1_546 = __noswap_vget_high_s8(__rev0_546); - __ret_546 = (int16x8_t)(__noswap_vshll_n_s8(__a1_546, 0)); - __ret_546 = __builtin_shufflevector(__ret_546, __ret_546, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret_546; -} -__ai __attribute__((target("neon"))) int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_547) { - int16x8_t __ret_547; - int8x8_t __a1_547 = __noswap_vget_high_s8(__p0_547); - __ret_547 = (int16x8_t)(__noswap_vshll_n_s8(__a1_547, 0)); - return __ret_547; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("neon"))) int64x2_t vmovl_high_s32(int32x4_t __p0_548) { - int64x2_t __ret_548; - int32x2_t __a1_548 = vget_high_s32(__p0_548); - __ret_548 = (int64x2_t)(vshll_n_s32(__a1_548, 0)); - return __ret_548; -} -#else -__ai __attribute__((target("neon"))) int64x2_t vmovl_high_s32(int32x4_t __p0_549) { - int64x2_t __ret_549; - int32x4_t __rev0_549; __rev0_549 = __builtin_shufflevector(__p0_549, __p0_549, 3, 2, 1, 0); - int32x2_t __a1_549 = __noswap_vget_high_s32(__rev0_549); - __ret_549 = (int64x2_t)(__noswap_vshll_n_s32(__a1_549, 0)); - __ret_549 = __builtin_shufflevector(__ret_549, __ret_549, 1, 0); - return __ret_549; -} -__ai __attribute__((target("neon"))) int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_550) { - int64x2_t __ret_550; - int32x2_t __a1_550 = __noswap_vget_high_s32(__p0_550); - __ret_550 = (int64x2_t)(__noswap_vshll_n_s32(__a1_550, 0)); - return __ret_550; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("neon"))) int32x4_t vmovl_high_s16(int16x8_t __p0_551) { - int32x4_t __ret_551; - int16x4_t __a1_551 = vget_high_s16(__p0_551); - __ret_551 = (int32x4_t)(vshll_n_s16(__a1_551, 0)); - return __ret_551; -} -#else -__ai __attribute__((target("neon"))) int32x4_t vmovl_high_s16(int16x8_t __p0_552) { - int32x4_t __ret_552; - int16x8_t __rev0_552; __rev0_552 = __builtin_shufflevector(__p0_552, __p0_552, 7, 6, 5, 4, 3, 2, 1, 0); - int16x4_t __a1_552 = __noswap_vget_high_s16(__rev0_552); - __ret_552 = (int32x4_t)(__noswap_vshll_n_s16(__a1_552, 0)); - __ret_552 = __builtin_shufflevector(__ret_552, __ret_552, 3, 2, 1, 0); +__ai __attribute__((target("neon"))) uint16x8_t vmovl_high_u8(uint8x16_t __p0_552) { + uint16x8_t __ret_552; + uint8x8_t __a1_552 = vget_high_u8(__p0_552); + __ret_552 = __builtin_bit_cast(uint16x8_t, vshll_n_u8(__a1_552, 0)); return __ret_552; } -__ai __attribute__((target("neon"))) int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_553) { - int32x4_t __ret_553; - int16x4_t __a1_553 = __noswap_vget_high_s16(__p0_553); - __ret_553 = (int32x4_t)(__noswap_vshll_n_s16(__a1_553, 0)); +#else +__ai __attribute__((target("neon"))) uint16x8_t vmovl_high_u8(uint8x16_t __p0_553) { + uint16x8_t __ret_553; + uint8x16_t __rev0_553; __rev0_553 = __builtin_shufflevector(__p0_553, __p0_553, __lane_reverse_128_8); + uint8x8_t __a1_553 = __noswap_vget_high_u8(__rev0_553); + __ret_553 = __builtin_bit_cast(uint16x8_t, __noswap_vshll_n_u8(__a1_553, 0)); + __ret_553 = __builtin_shufflevector(__ret_553, __ret_553, __lane_reverse_128_16); return __ret_553; } +__ai __attribute__((target("neon"))) uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_554) { + uint16x8_t __ret_554; + uint8x8_t __a1_554 = __noswap_vget_high_u8(__p0_554); + __ret_554 = __builtin_bit_cast(uint16x8_t, __noswap_vshll_n_u8(__a1_554, 0)); + return __ret_554; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) uint64x2_t vmovl_high_u32(uint32x4_t __p0_555) { + uint64x2_t __ret_555; + uint32x2_t __a1_555 = vget_high_u32(__p0_555); + __ret_555 = __builtin_bit_cast(uint64x2_t, vshll_n_u32(__a1_555, 0)); + return __ret_555; +} +#else +__ai __attribute__((target("neon"))) uint64x2_t vmovl_high_u32(uint32x4_t __p0_556) { + uint64x2_t __ret_556; + uint32x4_t __rev0_556; __rev0_556 = __builtin_shufflevector(__p0_556, __p0_556, __lane_reverse_128_32); + uint32x2_t __a1_556 = __noswap_vget_high_u32(__rev0_556); + __ret_556 = __builtin_bit_cast(uint64x2_t, __noswap_vshll_n_u32(__a1_556, 0)); + __ret_556 = __builtin_shufflevector(__ret_556, __ret_556, __lane_reverse_128_64); + return __ret_556; +} +__ai __attribute__((target("neon"))) uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_557) { + uint64x2_t __ret_557; + uint32x2_t __a1_557 = __noswap_vget_high_u32(__p0_557); + __ret_557 = __builtin_bit_cast(uint64x2_t, __noswap_vshll_n_u32(__a1_557, 0)); + return __ret_557; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) uint32x4_t vmovl_high_u16(uint16x8_t __p0_558) { + uint32x4_t __ret_558; + uint16x4_t __a1_558 = vget_high_u16(__p0_558); + __ret_558 = __builtin_bit_cast(uint32x4_t, vshll_n_u16(__a1_558, 0)); + return __ret_558; +} +#else +__ai __attribute__((target("neon"))) uint32x4_t vmovl_high_u16(uint16x8_t __p0_559) { + uint32x4_t __ret_559; + uint16x8_t __rev0_559; __rev0_559 = __builtin_shufflevector(__p0_559, __p0_559, __lane_reverse_128_16); + uint16x4_t __a1_559 = __noswap_vget_high_u16(__rev0_559); + __ret_559 = __builtin_bit_cast(uint32x4_t, __noswap_vshll_n_u16(__a1_559, 0)); + __ret_559 = __builtin_shufflevector(__ret_559, __ret_559, __lane_reverse_128_32); + return __ret_559; +} +__ai __attribute__((target("neon"))) uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_560) { + uint32x4_t __ret_560; + uint16x4_t __a1_560 = __noswap_vget_high_u16(__p0_560); + __ret_560 = __builtin_bit_cast(uint32x4_t, __noswap_vshll_n_u16(__a1_560, 0)); + return __ret_560; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) int16x8_t vmovl_high_s8(int8x16_t __p0_561) { + int16x8_t __ret_561; + int8x8_t __a1_561 = vget_high_s8(__p0_561); + __ret_561 = __builtin_bit_cast(int16x8_t, vshll_n_s8(__a1_561, 0)); + return __ret_561; +} +#else +__ai __attribute__((target("neon"))) int16x8_t vmovl_high_s8(int8x16_t __p0_562) { + int16x8_t __ret_562; + int8x16_t __rev0_562; __rev0_562 = __builtin_shufflevector(__p0_562, __p0_562, __lane_reverse_128_8); + int8x8_t __a1_562 = __noswap_vget_high_s8(__rev0_562); + __ret_562 = __builtin_bit_cast(int16x8_t, __noswap_vshll_n_s8(__a1_562, 0)); + __ret_562 = __builtin_shufflevector(__ret_562, __ret_562, __lane_reverse_128_16); + return __ret_562; +} +__ai __attribute__((target("neon"))) int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_563) { + int16x8_t __ret_563; + int8x8_t __a1_563 = __noswap_vget_high_s8(__p0_563); + __ret_563 = __builtin_bit_cast(int16x8_t, __noswap_vshll_n_s8(__a1_563, 0)); + return __ret_563; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) int64x2_t vmovl_high_s32(int32x4_t __p0_564) { + int64x2_t __ret_564; + int32x2_t __a1_564 = vget_high_s32(__p0_564); + __ret_564 = __builtin_bit_cast(int64x2_t, vshll_n_s32(__a1_564, 0)); + return __ret_564; +} +#else +__ai __attribute__((target("neon"))) int64x2_t vmovl_high_s32(int32x4_t __p0_565) { + int64x2_t __ret_565; + int32x4_t __rev0_565; __rev0_565 = __builtin_shufflevector(__p0_565, __p0_565, __lane_reverse_128_32); + int32x2_t __a1_565 = __noswap_vget_high_s32(__rev0_565); + __ret_565 = __builtin_bit_cast(int64x2_t, __noswap_vshll_n_s32(__a1_565, 0)); + __ret_565 = __builtin_shufflevector(__ret_565, __ret_565, __lane_reverse_128_64); + return __ret_565; +} +__ai __attribute__((target("neon"))) int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_566) { + int64x2_t __ret_566; + int32x2_t __a1_566 = __noswap_vget_high_s32(__p0_566); + __ret_566 = __builtin_bit_cast(int64x2_t, __noswap_vshll_n_s32(__a1_566, 0)); + return __ret_566; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) int32x4_t vmovl_high_s16(int16x8_t __p0_567) { + int32x4_t __ret_567; + int16x4_t __a1_567 = vget_high_s16(__p0_567); + __ret_567 = __builtin_bit_cast(int32x4_t, vshll_n_s16(__a1_567, 0)); + return __ret_567; +} +#else +__ai __attribute__((target("neon"))) int32x4_t vmovl_high_s16(int16x8_t __p0_568) { + int32x4_t __ret_568; + int16x8_t __rev0_568; __rev0_568 = __builtin_shufflevector(__p0_568, __p0_568, __lane_reverse_128_16); + int16x4_t __a1_568 = __noswap_vget_high_s16(__rev0_568); + __ret_568 = __builtin_bit_cast(int32x4_t, __noswap_vshll_n_s16(__a1_568, 0)); + __ret_568 = __builtin_shufflevector(__ret_568, __ret_568, __lane_reverse_128_32); + return __ret_568; +} +__ai __attribute__((target("neon"))) int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_569) { + int32x4_t __ret_569; + int16x4_t __a1_569 = __noswap_vget_high_s16(__p0_569); + __ret_569 = __builtin_bit_cast(int32x4_t, __noswap_vshll_n_s16(__a1_569, 0)); + return __ret_569; +} #endif #ifdef __LITTLE_ENDIAN__ @@ -54206,10 +55864,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vmovn_high_u32(uint16x4_t __p0, #else __ai __attribute__((target("neon"))) uint16x8_t vmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vcombine_u16(__rev0, __noswap_vmovn_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -54223,10 +55881,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmovn_high_u64(uint32x2_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vcombine_u32(__rev0, __noswap_vmovn_u64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -54240,10 +55898,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vmovn_high_u16(uint8x8_t __p0, u #else __ai __attribute__((target("neon"))) uint8x16_t vmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vcombine_u8(__rev0, __noswap_vmovn_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -54257,10 +55915,10 @@ __ai __attribute__((target("neon"))) int16x8_t vmovn_high_s32(int16x4_t __p0, in #else __ai __attribute__((target("neon"))) int16x8_t vmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vcombine_s16(__rev0, __noswap_vmovn_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -54274,10 +55932,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmovn_high_s64(int32x2_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vcombine_s32(__rev0, __noswap_vmovn_s64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -54291,10 +55949,10 @@ __ai __attribute__((target("neon"))) int8x16_t vmovn_high_s16(int8x8_t __p0, int #else __ai __attribute__((target("neon"))) int8x16_t vmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vcombine_s8(__rev0, __noswap_vmovn_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -54308,10 +55966,10 @@ __ai __attribute__((target("neon"))) float64x2_t vmulq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vmulq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 * __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54321,29 +55979,29 @@ __ai __attribute__((target("neon"))) float64x1_t vmul_f64(float64x1_t __p0, floa __ret = __p0 * __p1; return __ret; } -#define vmuld_lane_f64(__p0_554, __p1_554, __p2_554) __extension__ ({ \ - float64_t __ret_554; \ - float64_t __s0_554 = __p0_554; \ - float64x1_t __s1_554 = __p1_554; \ - __ret_554 = __s0_554 * vget_lane_f64(__s1_554, __p2_554); \ - __ret_554; \ +#define vmuld_lane_f64(__p0_570, __p1_570, __p2_570) __extension__ ({ \ + float64_t __ret_570; \ + float64_t __s0_570 = __p0_570; \ + float64x1_t __s1_570 = __p1_570; \ + __ret_570 = __s0_570 * vget_lane_f64(__s1_570, __p2_570); \ + __ret_570; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmuls_lane_f32(__p0_555, __p1_555, __p2_555) __extension__ ({ \ - float32_t __ret_555; \ - float32_t __s0_555 = __p0_555; \ - float32x2_t __s1_555 = __p1_555; \ - __ret_555 = __s0_555 * vget_lane_f32(__s1_555, __p2_555); \ - __ret_555; \ +#define vmuls_lane_f32(__p0_571, __p1_571, __p2_571) __extension__ ({ \ + float32_t __ret_571; \ + float32_t __s0_571 = __p0_571; \ + float32x2_t __s1_571 = __p1_571; \ + __ret_571 = __s0_571 * vget_lane_f32(__s1_571, __p2_571); \ + __ret_571; \ }) #else -#define vmuls_lane_f32(__p0_556, __p1_556, __p2_556) __extension__ ({ \ - float32_t __ret_556; \ - float32_t __s0_556 = __p0_556; \ - float32x2_t __s1_556 = __p1_556; \ - float32x2_t __rev1_556; __rev1_556 = __builtin_shufflevector(__s1_556, __s1_556, 1, 0); \ - __ret_556 = __s0_556 * __noswap_vget_lane_f32(__rev1_556, __p2_556); \ - __ret_556; \ +#define vmuls_lane_f32(__p0_572, __p1_572, __p2_572) __extension__ ({ \ + float32_t __ret_572; \ + float32_t __s0_572 = __p0_572; \ + float32x2_t __s1_572 = __p1_572; \ + float32x2_t __rev1_572; __rev1_572 = __builtin_shufflevector(__s1_572, __s1_572, __lane_reverse_64_32); \ + __ret_572 = __s0_572 * __noswap_vget_lane_f32(__rev1_572, __p2_572); \ + __ret_572; \ }) #endif @@ -54351,320 +56009,320 @@ __ai __attribute__((target("neon"))) float64x1_t vmul_f64(float64x1_t __p0, floa float64x1_t __ret; \ float64x1_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ - __ret = (float64x1_t) __builtin_neon_vmul_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmul_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 10)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f64(__p0_557, __p1_557, __p2_557) __extension__ ({ \ - float64x2_t __ret_557; \ - float64x2_t __s0_557 = __p0_557; \ - float64x1_t __s1_557 = __p1_557; \ - __ret_557 = __s0_557 * splatq_lane_f64(__s1_557, __p2_557); \ - __ret_557; \ -}) -#else -#define vmulq_lane_f64(__p0_558, __p1_558, __p2_558) __extension__ ({ \ - float64x2_t __ret_558; \ - float64x2_t __s0_558 = __p0_558; \ - float64x1_t __s1_558 = __p1_558; \ - float64x2_t __rev0_558; __rev0_558 = __builtin_shufflevector(__s0_558, __s0_558, 1, 0); \ - __ret_558 = __rev0_558 * __noswap_splatq_lane_f64(__s1_558, __p2_558); \ - __ret_558 = __builtin_shufflevector(__ret_558, __ret_558, 1, 0); \ - __ret_558; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmuld_laneq_f64(__p0_559, __p1_559, __p2_559) __extension__ ({ \ - float64_t __ret_559; \ - float64_t __s0_559 = __p0_559; \ - float64x2_t __s1_559 = __p1_559; \ - __ret_559 = __s0_559 * vgetq_lane_f64(__s1_559, __p2_559); \ - __ret_559; \ -}) -#else -#define vmuld_laneq_f64(__p0_560, __p1_560, __p2_560) __extension__ ({ \ - float64_t __ret_560; \ - float64_t __s0_560 = __p0_560; \ - float64x2_t __s1_560 = __p1_560; \ - float64x2_t __rev1_560; __rev1_560 = __builtin_shufflevector(__s1_560, __s1_560, 1, 0); \ - __ret_560 = __s0_560 * __noswap_vgetq_lane_f64(__rev1_560, __p2_560); \ - __ret_560; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmuls_laneq_f32(__p0_561, __p1_561, __p2_561) __extension__ ({ \ - float32_t __ret_561; \ - float32_t __s0_561 = __p0_561; \ - float32x4_t __s1_561 = __p1_561; \ - __ret_561 = __s0_561 * vgetq_lane_f32(__s1_561, __p2_561); \ - __ret_561; \ -}) -#else -#define vmuls_laneq_f32(__p0_562, __p1_562, __p2_562) __extension__ ({ \ - float32_t __ret_562; \ - float32_t __s0_562 = __p0_562; \ - float32x4_t __s1_562 = __p1_562; \ - float32x4_t __rev1_562; __rev1_562 = __builtin_shufflevector(__s1_562, __s1_562, 3, 2, 1, 0); \ - __ret_562 = __s0_562 * __noswap_vgetq_lane_f32(__rev1_562, __p2_562); \ - __ret_562; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __ret; \ - float64x1_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 10); \ - __ret; \ -}) -#else -#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ - float64x1_t __ret; \ - float64x1_t __s0 = __p0; \ - float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float64x1_t) __builtin_neon_vmul_laneq_v((int8x8_t)__s0, (int8x16_t)__rev1, __p2, 10); \ - __ret; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_u32(__p0_563, __p1_563, __p2_563) __extension__ ({ \ - uint32x4_t __ret_563; \ - uint32x4_t __s0_563 = __p0_563; \ - uint32x4_t __s1_563 = __p1_563; \ - __ret_563 = __s0_563 * splatq_laneq_u32(__s1_563, __p2_563); \ - __ret_563; \ -}) -#else -#define vmulq_laneq_u32(__p0_564, __p1_564, __p2_564) __extension__ ({ \ - uint32x4_t __ret_564; \ - uint32x4_t __s0_564 = __p0_564; \ - uint32x4_t __s1_564 = __p1_564; \ - uint32x4_t __rev0_564; __rev0_564 = __builtin_shufflevector(__s0_564, __s0_564, 3, 2, 1, 0); \ - uint32x4_t __rev1_564; __rev1_564 = __builtin_shufflevector(__s1_564, __s1_564, 3, 2, 1, 0); \ - __ret_564 = __rev0_564 * __noswap_splatq_laneq_u32(__rev1_564, __p2_564); \ - __ret_564 = __builtin_shufflevector(__ret_564, __ret_564, 3, 2, 1, 0); \ - __ret_564; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_u16(__p0_565, __p1_565, __p2_565) __extension__ ({ \ - uint16x8_t __ret_565; \ - uint16x8_t __s0_565 = __p0_565; \ - uint16x8_t __s1_565 = __p1_565; \ - __ret_565 = __s0_565 * splatq_laneq_u16(__s1_565, __p2_565); \ - __ret_565; \ -}) -#else -#define vmulq_laneq_u16(__p0_566, __p1_566, __p2_566) __extension__ ({ \ - uint16x8_t __ret_566; \ - uint16x8_t __s0_566 = __p0_566; \ - uint16x8_t __s1_566 = __p1_566; \ - uint16x8_t __rev0_566; __rev0_566 = __builtin_shufflevector(__s0_566, __s0_566, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_566; __rev1_566 = __builtin_shufflevector(__s1_566, __s1_566, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_566 = __rev0_566 * __noswap_splatq_laneq_u16(__rev1_566, __p2_566); \ - __ret_566 = __builtin_shufflevector(__ret_566, __ret_566, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_566; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f64(__p0_567, __p1_567, __p2_567) __extension__ ({ \ - float64x2_t __ret_567; \ - float64x2_t __s0_567 = __p0_567; \ - float64x2_t __s1_567 = __p1_567; \ - __ret_567 = __s0_567 * splatq_laneq_f64(__s1_567, __p2_567); \ - __ret_567; \ -}) -#else -#define vmulq_laneq_f64(__p0_568, __p1_568, __p2_568) __extension__ ({ \ - float64x2_t __ret_568; \ - float64x2_t __s0_568 = __p0_568; \ - float64x2_t __s1_568 = __p1_568; \ - float64x2_t __rev0_568; __rev0_568 = __builtin_shufflevector(__s0_568, __s0_568, 1, 0); \ - float64x2_t __rev1_568; __rev1_568 = __builtin_shufflevector(__s1_568, __s1_568, 1, 0); \ - __ret_568 = __rev0_568 * __noswap_splatq_laneq_f64(__rev1_568, __p2_568); \ - __ret_568 = __builtin_shufflevector(__ret_568, __ret_568, 1, 0); \ - __ret_568; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f32(__p0_569, __p1_569, __p2_569) __extension__ ({ \ - float32x4_t __ret_569; \ - float32x4_t __s0_569 = __p0_569; \ - float32x4_t __s1_569 = __p1_569; \ - __ret_569 = __s0_569 * splatq_laneq_f32(__s1_569, __p2_569); \ - __ret_569; \ -}) -#else -#define vmulq_laneq_f32(__p0_570, __p1_570, __p2_570) __extension__ ({ \ - float32x4_t __ret_570; \ - float32x4_t __s0_570 = __p0_570; \ - float32x4_t __s1_570 = __p1_570; \ - float32x4_t __rev0_570; __rev0_570 = __builtin_shufflevector(__s0_570, __s0_570, 3, 2, 1, 0); \ - float32x4_t __rev1_570; __rev1_570 = __builtin_shufflevector(__s1_570, __s1_570, 3, 2, 1, 0); \ - __ret_570 = __rev0_570 * __noswap_splatq_laneq_f32(__rev1_570, __p2_570); \ - __ret_570 = __builtin_shufflevector(__ret_570, __ret_570, 3, 2, 1, 0); \ - __ret_570; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_s32(__p0_571, __p1_571, __p2_571) __extension__ ({ \ - int32x4_t __ret_571; \ - int32x4_t __s0_571 = __p0_571; \ - int32x4_t __s1_571 = __p1_571; \ - __ret_571 = __s0_571 * splatq_laneq_s32(__s1_571, __p2_571); \ - __ret_571; \ -}) -#else -#define vmulq_laneq_s32(__p0_572, __p1_572, __p2_572) __extension__ ({ \ - int32x4_t __ret_572; \ - int32x4_t __s0_572 = __p0_572; \ - int32x4_t __s1_572 = __p1_572; \ - int32x4_t __rev0_572; __rev0_572 = __builtin_shufflevector(__s0_572, __s0_572, 3, 2, 1, 0); \ - int32x4_t __rev1_572; __rev1_572 = __builtin_shufflevector(__s1_572, __s1_572, 3, 2, 1, 0); \ - __ret_572 = __rev0_572 * __noswap_splatq_laneq_s32(__rev1_572, __p2_572); \ - __ret_572 = __builtin_shufflevector(__ret_572, __ret_572, 3, 2, 1, 0); \ - __ret_572; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_s16(__p0_573, __p1_573, __p2_573) __extension__ ({ \ - int16x8_t __ret_573; \ - int16x8_t __s0_573 = __p0_573; \ - int16x8_t __s1_573 = __p1_573; \ - __ret_573 = __s0_573 * splatq_laneq_s16(__s1_573, __p2_573); \ +#define vmulq_lane_f64(__p0_573, __p1_573, __p2_573) __extension__ ({ \ + float64x2_t __ret_573; \ + float64x2_t __s0_573 = __p0_573; \ + float64x1_t __s1_573 = __p1_573; \ + __ret_573 = __s0_573 * splatq_lane_f64(__s1_573, __p2_573); \ __ret_573; \ }) #else -#define vmulq_laneq_s16(__p0_574, __p1_574, __p2_574) __extension__ ({ \ - int16x8_t __ret_574; \ - int16x8_t __s0_574 = __p0_574; \ - int16x8_t __s1_574 = __p1_574; \ - int16x8_t __rev0_574; __rev0_574 = __builtin_shufflevector(__s0_574, __s0_574, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_574; __rev1_574 = __builtin_shufflevector(__s1_574, __s1_574, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_574 = __rev0_574 * __noswap_splatq_laneq_s16(__rev1_574, __p2_574); \ - __ret_574 = __builtin_shufflevector(__ret_574, __ret_574, 7, 6, 5, 4, 3, 2, 1, 0); \ +#define vmulq_lane_f64(__p0_574, __p1_574, __p2_574) __extension__ ({ \ + float64x2_t __ret_574; \ + float64x2_t __s0_574 = __p0_574; \ + float64x1_t __s1_574 = __p1_574; \ + float64x2_t __rev0_574; __rev0_574 = __builtin_shufflevector(__s0_574, __s0_574, __lane_reverse_128_64); \ + __ret_574 = __rev0_574 * __noswap_splatq_lane_f64(__s1_574, __p2_574); \ + __ret_574 = __builtin_shufflevector(__ret_574, __ret_574, __lane_reverse_128_64); \ __ret_574; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_u32(__p0_575, __p1_575, __p2_575) __extension__ ({ \ - uint32x2_t __ret_575; \ - uint32x2_t __s0_575 = __p0_575; \ - uint32x4_t __s1_575 = __p1_575; \ - __ret_575 = __s0_575 * splat_laneq_u32(__s1_575, __p2_575); \ +#define vmuld_laneq_f64(__p0_575, __p1_575, __p2_575) __extension__ ({ \ + float64_t __ret_575; \ + float64_t __s0_575 = __p0_575; \ + float64x2_t __s1_575 = __p1_575; \ + __ret_575 = __s0_575 * vgetq_lane_f64(__s1_575, __p2_575); \ __ret_575; \ }) #else -#define vmul_laneq_u32(__p0_576, __p1_576, __p2_576) __extension__ ({ \ - uint32x2_t __ret_576; \ - uint32x2_t __s0_576 = __p0_576; \ - uint32x4_t __s1_576 = __p1_576; \ - uint32x2_t __rev0_576; __rev0_576 = __builtin_shufflevector(__s0_576, __s0_576, 1, 0); \ - uint32x4_t __rev1_576; __rev1_576 = __builtin_shufflevector(__s1_576, __s1_576, 3, 2, 1, 0); \ - __ret_576 = __rev0_576 * __noswap_splat_laneq_u32(__rev1_576, __p2_576); \ - __ret_576 = __builtin_shufflevector(__ret_576, __ret_576, 1, 0); \ +#define vmuld_laneq_f64(__p0_576, __p1_576, __p2_576) __extension__ ({ \ + float64_t __ret_576; \ + float64_t __s0_576 = __p0_576; \ + float64x2_t __s1_576 = __p1_576; \ + float64x2_t __rev1_576; __rev1_576 = __builtin_shufflevector(__s1_576, __s1_576, __lane_reverse_128_64); \ + __ret_576 = __s0_576 * __noswap_vgetq_lane_f64(__rev1_576, __p2_576); \ __ret_576; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_u16(__p0_577, __p1_577, __p2_577) __extension__ ({ \ - uint16x4_t __ret_577; \ - uint16x4_t __s0_577 = __p0_577; \ - uint16x8_t __s1_577 = __p1_577; \ - __ret_577 = __s0_577 * splat_laneq_u16(__s1_577, __p2_577); \ +#define vmuls_laneq_f32(__p0_577, __p1_577, __p2_577) __extension__ ({ \ + float32_t __ret_577; \ + float32_t __s0_577 = __p0_577; \ + float32x4_t __s1_577 = __p1_577; \ + __ret_577 = __s0_577 * vgetq_lane_f32(__s1_577, __p2_577); \ __ret_577; \ }) #else -#define vmul_laneq_u16(__p0_578, __p1_578, __p2_578) __extension__ ({ \ - uint16x4_t __ret_578; \ - uint16x4_t __s0_578 = __p0_578; \ - uint16x8_t __s1_578 = __p1_578; \ - uint16x4_t __rev0_578; __rev0_578 = __builtin_shufflevector(__s0_578, __s0_578, 3, 2, 1, 0); \ - uint16x8_t __rev1_578; __rev1_578 = __builtin_shufflevector(__s1_578, __s1_578, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_578 = __rev0_578 * __noswap_splat_laneq_u16(__rev1_578, __p2_578); \ - __ret_578 = __builtin_shufflevector(__ret_578, __ret_578, 3, 2, 1, 0); \ +#define vmuls_laneq_f32(__p0_578, __p1_578, __p2_578) __extension__ ({ \ + float32_t __ret_578; \ + float32_t __s0_578 = __p0_578; \ + float32x4_t __s1_578 = __p1_578; \ + float32x4_t __rev1_578; __rev1_578 = __builtin_shufflevector(__s1_578, __s1_578, __lane_reverse_128_32); \ + __ret_578 = __s0_578 * __noswap_vgetq_lane_f32(__rev1_578, __p2_578); \ __ret_578; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f32(__p0_579, __p1_579, __p2_579) __extension__ ({ \ - float32x2_t __ret_579; \ - float32x2_t __s0_579 = __p0_579; \ - float32x4_t __s1_579 = __p1_579; \ - __ret_579 = __s0_579 * splat_laneq_f32(__s1_579, __p2_579); \ +#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __ret; \ + float64x1_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmul_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 10)); \ + __ret; \ +}) +#else +#define vmul_laneq_f64(__p0, __p1, __p2) __extension__ ({ \ + float64x1_t __ret; \ + float64x1_t __s0 = __p0; \ + float64x2_t __s1 = __p1; \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmul_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 10)); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_laneq_u32(__p0_579, __p1_579, __p2_579) __extension__ ({ \ + uint32x4_t __ret_579; \ + uint32x4_t __s0_579 = __p0_579; \ + uint32x4_t __s1_579 = __p1_579; \ + __ret_579 = __s0_579 * splatq_laneq_u32(__s1_579, __p2_579); \ __ret_579; \ }) #else -#define vmul_laneq_f32(__p0_580, __p1_580, __p2_580) __extension__ ({ \ - float32x2_t __ret_580; \ - float32x2_t __s0_580 = __p0_580; \ - float32x4_t __s1_580 = __p1_580; \ - float32x2_t __rev0_580; __rev0_580 = __builtin_shufflevector(__s0_580, __s0_580, 1, 0); \ - float32x4_t __rev1_580; __rev1_580 = __builtin_shufflevector(__s1_580, __s1_580, 3, 2, 1, 0); \ - __ret_580 = __rev0_580 * __noswap_splat_laneq_f32(__rev1_580, __p2_580); \ - __ret_580 = __builtin_shufflevector(__ret_580, __ret_580, 1, 0); \ +#define vmulq_laneq_u32(__p0_580, __p1_580, __p2_580) __extension__ ({ \ + uint32x4_t __ret_580; \ + uint32x4_t __s0_580 = __p0_580; \ + uint32x4_t __s1_580 = __p1_580; \ + uint32x4_t __rev0_580; __rev0_580 = __builtin_shufflevector(__s0_580, __s0_580, __lane_reverse_128_32); \ + uint32x4_t __rev1_580; __rev1_580 = __builtin_shufflevector(__s1_580, __s1_580, __lane_reverse_128_32); \ + __ret_580 = __rev0_580 * __noswap_splatq_laneq_u32(__rev1_580, __p2_580); \ + __ret_580 = __builtin_shufflevector(__ret_580, __ret_580, __lane_reverse_128_32); \ __ret_580; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_s32(__p0_581, __p1_581, __p2_581) __extension__ ({ \ - int32x2_t __ret_581; \ - int32x2_t __s0_581 = __p0_581; \ - int32x4_t __s1_581 = __p1_581; \ - __ret_581 = __s0_581 * splat_laneq_s32(__s1_581, __p2_581); \ +#define vmulq_laneq_u16(__p0_581, __p1_581, __p2_581) __extension__ ({ \ + uint16x8_t __ret_581; \ + uint16x8_t __s0_581 = __p0_581; \ + uint16x8_t __s1_581 = __p1_581; \ + __ret_581 = __s0_581 * splatq_laneq_u16(__s1_581, __p2_581); \ __ret_581; \ }) #else -#define vmul_laneq_s32(__p0_582, __p1_582, __p2_582) __extension__ ({ \ - int32x2_t __ret_582; \ - int32x2_t __s0_582 = __p0_582; \ - int32x4_t __s1_582 = __p1_582; \ - int32x2_t __rev0_582; __rev0_582 = __builtin_shufflevector(__s0_582, __s0_582, 1, 0); \ - int32x4_t __rev1_582; __rev1_582 = __builtin_shufflevector(__s1_582, __s1_582, 3, 2, 1, 0); \ - __ret_582 = __rev0_582 * __noswap_splat_laneq_s32(__rev1_582, __p2_582); \ - __ret_582 = __builtin_shufflevector(__ret_582, __ret_582, 1, 0); \ +#define vmulq_laneq_u16(__p0_582, __p1_582, __p2_582) __extension__ ({ \ + uint16x8_t __ret_582; \ + uint16x8_t __s0_582 = __p0_582; \ + uint16x8_t __s1_582 = __p1_582; \ + uint16x8_t __rev0_582; __rev0_582 = __builtin_shufflevector(__s0_582, __s0_582, __lane_reverse_128_16); \ + uint16x8_t __rev1_582; __rev1_582 = __builtin_shufflevector(__s1_582, __s1_582, __lane_reverse_128_16); \ + __ret_582 = __rev0_582 * __noswap_splatq_laneq_u16(__rev1_582, __p2_582); \ + __ret_582 = __builtin_shufflevector(__ret_582, __ret_582, __lane_reverse_128_16); \ __ret_582; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_s16(__p0_583, __p1_583, __p2_583) __extension__ ({ \ - int16x4_t __ret_583; \ - int16x4_t __s0_583 = __p0_583; \ - int16x8_t __s1_583 = __p1_583; \ - __ret_583 = __s0_583 * splat_laneq_s16(__s1_583, __p2_583); \ +#define vmulq_laneq_f64(__p0_583, __p1_583, __p2_583) __extension__ ({ \ + float64x2_t __ret_583; \ + float64x2_t __s0_583 = __p0_583; \ + float64x2_t __s1_583 = __p1_583; \ + __ret_583 = __s0_583 * splatq_laneq_f64(__s1_583, __p2_583); \ __ret_583; \ }) #else -#define vmul_laneq_s16(__p0_584, __p1_584, __p2_584) __extension__ ({ \ - int16x4_t __ret_584; \ - int16x4_t __s0_584 = __p0_584; \ - int16x8_t __s1_584 = __p1_584; \ - int16x4_t __rev0_584; __rev0_584 = __builtin_shufflevector(__s0_584, __s0_584, 3, 2, 1, 0); \ - int16x8_t __rev1_584; __rev1_584 = __builtin_shufflevector(__s1_584, __s1_584, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_584 = __rev0_584 * __noswap_splat_laneq_s16(__rev1_584, __p2_584); \ - __ret_584 = __builtin_shufflevector(__ret_584, __ret_584, 3, 2, 1, 0); \ +#define vmulq_laneq_f64(__p0_584, __p1_584, __p2_584) __extension__ ({ \ + float64x2_t __ret_584; \ + float64x2_t __s0_584 = __p0_584; \ + float64x2_t __s1_584 = __p1_584; \ + float64x2_t __rev0_584; __rev0_584 = __builtin_shufflevector(__s0_584, __s0_584, __lane_reverse_128_64); \ + float64x2_t __rev1_584; __rev1_584 = __builtin_shufflevector(__s1_584, __s1_584, __lane_reverse_128_64); \ + __ret_584 = __rev0_584 * __noswap_splatq_laneq_f64(__rev1_584, __p2_584); \ + __ret_584 = __builtin_shufflevector(__ret_584, __ret_584, __lane_reverse_128_64); \ __ret_584; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vmulq_laneq_f32(__p0_585, __p1_585, __p2_585) __extension__ ({ \ + float32x4_t __ret_585; \ + float32x4_t __s0_585 = __p0_585; \ + float32x4_t __s1_585 = __p1_585; \ + __ret_585 = __s0_585 * splatq_laneq_f32(__s1_585, __p2_585); \ + __ret_585; \ +}) +#else +#define vmulq_laneq_f32(__p0_586, __p1_586, __p2_586) __extension__ ({ \ + float32x4_t __ret_586; \ + float32x4_t __s0_586 = __p0_586; \ + float32x4_t __s1_586 = __p1_586; \ + float32x4_t __rev0_586; __rev0_586 = __builtin_shufflevector(__s0_586, __s0_586, __lane_reverse_128_32); \ + float32x4_t __rev1_586; __rev1_586 = __builtin_shufflevector(__s1_586, __s1_586, __lane_reverse_128_32); \ + __ret_586 = __rev0_586 * __noswap_splatq_laneq_f32(__rev1_586, __p2_586); \ + __ret_586 = __builtin_shufflevector(__ret_586, __ret_586, __lane_reverse_128_32); \ + __ret_586; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_laneq_s32(__p0_587, __p1_587, __p2_587) __extension__ ({ \ + int32x4_t __ret_587; \ + int32x4_t __s0_587 = __p0_587; \ + int32x4_t __s1_587 = __p1_587; \ + __ret_587 = __s0_587 * splatq_laneq_s32(__s1_587, __p2_587); \ + __ret_587; \ +}) +#else +#define vmulq_laneq_s32(__p0_588, __p1_588, __p2_588) __extension__ ({ \ + int32x4_t __ret_588; \ + int32x4_t __s0_588 = __p0_588; \ + int32x4_t __s1_588 = __p1_588; \ + int32x4_t __rev0_588; __rev0_588 = __builtin_shufflevector(__s0_588, __s0_588, __lane_reverse_128_32); \ + int32x4_t __rev1_588; __rev1_588 = __builtin_shufflevector(__s1_588, __s1_588, __lane_reverse_128_32); \ + __ret_588 = __rev0_588 * __noswap_splatq_laneq_s32(__rev1_588, __p2_588); \ + __ret_588 = __builtin_shufflevector(__ret_588, __ret_588, __lane_reverse_128_32); \ + __ret_588; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_laneq_s16(__p0_589, __p1_589, __p2_589) __extension__ ({ \ + int16x8_t __ret_589; \ + int16x8_t __s0_589 = __p0_589; \ + int16x8_t __s1_589 = __p1_589; \ + __ret_589 = __s0_589 * splatq_laneq_s16(__s1_589, __p2_589); \ + __ret_589; \ +}) +#else +#define vmulq_laneq_s16(__p0_590, __p1_590, __p2_590) __extension__ ({ \ + int16x8_t __ret_590; \ + int16x8_t __s0_590 = __p0_590; \ + int16x8_t __s1_590 = __p1_590; \ + int16x8_t __rev0_590; __rev0_590 = __builtin_shufflevector(__s0_590, __s0_590, __lane_reverse_128_16); \ + int16x8_t __rev1_590; __rev1_590 = __builtin_shufflevector(__s1_590, __s1_590, __lane_reverse_128_16); \ + __ret_590 = __rev0_590 * __noswap_splatq_laneq_s16(__rev1_590, __p2_590); \ + __ret_590 = __builtin_shufflevector(__ret_590, __ret_590, __lane_reverse_128_16); \ + __ret_590; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_u32(__p0_591, __p1_591, __p2_591) __extension__ ({ \ + uint32x2_t __ret_591; \ + uint32x2_t __s0_591 = __p0_591; \ + uint32x4_t __s1_591 = __p1_591; \ + __ret_591 = __s0_591 * splat_laneq_u32(__s1_591, __p2_591); \ + __ret_591; \ +}) +#else +#define vmul_laneq_u32(__p0_592, __p1_592, __p2_592) __extension__ ({ \ + uint32x2_t __ret_592; \ + uint32x2_t __s0_592 = __p0_592; \ + uint32x4_t __s1_592 = __p1_592; \ + uint32x2_t __rev0_592; __rev0_592 = __builtin_shufflevector(__s0_592, __s0_592, __lane_reverse_64_32); \ + uint32x4_t __rev1_592; __rev1_592 = __builtin_shufflevector(__s1_592, __s1_592, __lane_reverse_128_32); \ + __ret_592 = __rev0_592 * __noswap_splat_laneq_u32(__rev1_592, __p2_592); \ + __ret_592 = __builtin_shufflevector(__ret_592, __ret_592, __lane_reverse_64_32); \ + __ret_592; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_u16(__p0_593, __p1_593, __p2_593) __extension__ ({ \ + uint16x4_t __ret_593; \ + uint16x4_t __s0_593 = __p0_593; \ + uint16x8_t __s1_593 = __p1_593; \ + __ret_593 = __s0_593 * splat_laneq_u16(__s1_593, __p2_593); \ + __ret_593; \ +}) +#else +#define vmul_laneq_u16(__p0_594, __p1_594, __p2_594) __extension__ ({ \ + uint16x4_t __ret_594; \ + uint16x4_t __s0_594 = __p0_594; \ + uint16x8_t __s1_594 = __p1_594; \ + uint16x4_t __rev0_594; __rev0_594 = __builtin_shufflevector(__s0_594, __s0_594, __lane_reverse_64_16); \ + uint16x8_t __rev1_594; __rev1_594 = __builtin_shufflevector(__s1_594, __s1_594, __lane_reverse_128_16); \ + __ret_594 = __rev0_594 * __noswap_splat_laneq_u16(__rev1_594, __p2_594); \ + __ret_594 = __builtin_shufflevector(__ret_594, __ret_594, __lane_reverse_64_16); \ + __ret_594; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_f32(__p0_595, __p1_595, __p2_595) __extension__ ({ \ + float32x2_t __ret_595; \ + float32x2_t __s0_595 = __p0_595; \ + float32x4_t __s1_595 = __p1_595; \ + __ret_595 = __s0_595 * splat_laneq_f32(__s1_595, __p2_595); \ + __ret_595; \ +}) +#else +#define vmul_laneq_f32(__p0_596, __p1_596, __p2_596) __extension__ ({ \ + float32x2_t __ret_596; \ + float32x2_t __s0_596 = __p0_596; \ + float32x4_t __s1_596 = __p1_596; \ + float32x2_t __rev0_596; __rev0_596 = __builtin_shufflevector(__s0_596, __s0_596, __lane_reverse_64_32); \ + float32x4_t __rev1_596; __rev1_596 = __builtin_shufflevector(__s1_596, __s1_596, __lane_reverse_128_32); \ + __ret_596 = __rev0_596 * __noswap_splat_laneq_f32(__rev1_596, __p2_596); \ + __ret_596 = __builtin_shufflevector(__ret_596, __ret_596, __lane_reverse_64_32); \ + __ret_596; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_s32(__p0_597, __p1_597, __p2_597) __extension__ ({ \ + int32x2_t __ret_597; \ + int32x2_t __s0_597 = __p0_597; \ + int32x4_t __s1_597 = __p1_597; \ + __ret_597 = __s0_597 * splat_laneq_s32(__s1_597, __p2_597); \ + __ret_597; \ +}) +#else +#define vmul_laneq_s32(__p0_598, __p1_598, __p2_598) __extension__ ({ \ + int32x2_t __ret_598; \ + int32x2_t __s0_598 = __p0_598; \ + int32x4_t __s1_598 = __p1_598; \ + int32x2_t __rev0_598; __rev0_598 = __builtin_shufflevector(__s0_598, __s0_598, __lane_reverse_64_32); \ + int32x4_t __rev1_598; __rev1_598 = __builtin_shufflevector(__s1_598, __s1_598, __lane_reverse_128_32); \ + __ret_598 = __rev0_598 * __noswap_splat_laneq_s32(__rev1_598, __p2_598); \ + __ret_598 = __builtin_shufflevector(__ret_598, __ret_598, __lane_reverse_64_32); \ + __ret_598; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_laneq_s16(__p0_599, __p1_599, __p2_599) __extension__ ({ \ + int16x4_t __ret_599; \ + int16x4_t __s0_599 = __p0_599; \ + int16x8_t __s1_599 = __p1_599; \ + __ret_599 = __s0_599 * splat_laneq_s16(__s1_599, __p2_599); \ + __ret_599; \ +}) +#else +#define vmul_laneq_s16(__p0_600, __p1_600, __p2_600) __extension__ ({ \ + int16x4_t __ret_600; \ + int16x4_t __s0_600 = __p0_600; \ + int16x8_t __s1_600 = __p1_600; \ + int16x4_t __rev0_600; __rev0_600 = __builtin_shufflevector(__s0_600, __s0_600, __lane_reverse_64_16); \ + int16x8_t __rev1_600; __rev1_600 = __builtin_shufflevector(__s1_600, __s1_600, __lane_reverse_128_16); \ + __ret_600 = __rev0_600 * __noswap_splat_laneq_s16(__rev1_600, __p2_600); \ + __ret_600 = __builtin_shufflevector(__ret_600, __ret_600, __lane_reverse_64_16); \ + __ret_600; \ +}) +#endif + __ai __attribute__((target("neon"))) float64x1_t vmul_n_f64(float64x1_t __p0, float64_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmul_n_f64((float64x1_t)__p0, __p1); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmul_n_f64(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -54676,9 +56334,9 @@ __ai __attribute__((target("neon"))) float64x2_t vmulq_n_f64(float64x2_t __p0, f #else __ai __attribute__((target("neon"))) float64x2_t vmulq_n_f64(float64x2_t __p0, float64_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = __rev0 * (float64x2_t) {__p1, __p1}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54692,10 +56350,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vmull_high_p8(poly8x16_t __p0, p #else __ai __attribute__((target("neon"))) poly16x8_t vmull_high_p8(poly8x16_t __p0, poly8x16_t __p1) { poly16x8_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmull_p8(__noswap_vget_high_p8(__rev0), __noswap_vget_high_p8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -54709,10 +56367,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vmull_high_u8(uint8x16_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vmull_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmull_u8(__noswap_vget_high_u8(__rev0), __noswap_vget_high_u8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -54726,10 +56384,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vmull_high_u32(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vmull_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0), __noswap_vget_high_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54743,10 +56401,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmull_high_u16(uint16x8_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vmull_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0), __noswap_vget_high_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -54760,10 +56418,10 @@ __ai __attribute__((target("neon"))) int16x8_t vmull_high_s8(int8x16_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vmull_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmull_s8(__noswap_vget_high_s8(__rev0), __noswap_vget_high_s8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -54777,10 +56435,10 @@ __ai __attribute__((target("neon"))) int64x2_t vmull_high_s32(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54794,179 +56452,179 @@ __ai __attribute__((target("neon"))) int32x4_t vmull_high_s16(int16x8_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vmull_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_u32(__p0_585, __p1_585, __p2_585) __extension__ ({ \ - uint64x2_t __ret_585; \ - uint32x4_t __s0_585 = __p0_585; \ - uint32x2_t __s1_585 = __p1_585; \ - __ret_585 = vmull_u32(vget_high_u32(__s0_585), splat_lane_u32(__s1_585, __p2_585)); \ - __ret_585; \ +#define vmull_high_lane_u32(__p0_601, __p1_601, __p2_601) __extension__ ({ \ + uint64x2_t __ret_601; \ + uint32x4_t __s0_601 = __p0_601; \ + uint32x2_t __s1_601 = __p1_601; \ + __ret_601 = vmull_u32(vget_high_u32(__s0_601), splat_lane_u32(__s1_601, __p2_601)); \ + __ret_601; \ }) #else -#define vmull_high_lane_u32(__p0_586, __p1_586, __p2_586) __extension__ ({ \ - uint64x2_t __ret_586; \ - uint32x4_t __s0_586 = __p0_586; \ - uint32x2_t __s1_586 = __p1_586; \ - uint32x4_t __rev0_586; __rev0_586 = __builtin_shufflevector(__s0_586, __s0_586, 3, 2, 1, 0); \ - uint32x2_t __rev1_586; __rev1_586 = __builtin_shufflevector(__s1_586, __s1_586, 1, 0); \ - __ret_586 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_586), __noswap_splat_lane_u32(__rev1_586, __p2_586)); \ - __ret_586 = __builtin_shufflevector(__ret_586, __ret_586, 1, 0); \ - __ret_586; \ +#define vmull_high_lane_u32(__p0_602, __p1_602, __p2_602) __extension__ ({ \ + uint64x2_t __ret_602; \ + uint32x4_t __s0_602 = __p0_602; \ + uint32x2_t __s1_602 = __p1_602; \ + uint32x4_t __rev0_602; __rev0_602 = __builtin_shufflevector(__s0_602, __s0_602, __lane_reverse_128_32); \ + uint32x2_t __rev1_602; __rev1_602 = __builtin_shufflevector(__s1_602, __s1_602, __lane_reverse_64_32); \ + __ret_602 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_602), __noswap_splat_lane_u32(__rev1_602, __p2_602)); \ + __ret_602 = __builtin_shufflevector(__ret_602, __ret_602, __lane_reverse_128_64); \ + __ret_602; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_u16(__p0_587, __p1_587, __p2_587) __extension__ ({ \ - uint32x4_t __ret_587; \ - uint16x8_t __s0_587 = __p0_587; \ - uint16x4_t __s1_587 = __p1_587; \ - __ret_587 = vmull_u16(vget_high_u16(__s0_587), splat_lane_u16(__s1_587, __p2_587)); \ - __ret_587; \ +#define vmull_high_lane_u16(__p0_603, __p1_603, __p2_603) __extension__ ({ \ + uint32x4_t __ret_603; \ + uint16x8_t __s0_603 = __p0_603; \ + uint16x4_t __s1_603 = __p1_603; \ + __ret_603 = vmull_u16(vget_high_u16(__s0_603), splat_lane_u16(__s1_603, __p2_603)); \ + __ret_603; \ }) #else -#define vmull_high_lane_u16(__p0_588, __p1_588, __p2_588) __extension__ ({ \ - uint32x4_t __ret_588; \ - uint16x8_t __s0_588 = __p0_588; \ - uint16x4_t __s1_588 = __p1_588; \ - uint16x8_t __rev0_588; __rev0_588 = __builtin_shufflevector(__s0_588, __s0_588, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev1_588; __rev1_588 = __builtin_shufflevector(__s1_588, __s1_588, 3, 2, 1, 0); \ - __ret_588 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_588), __noswap_splat_lane_u16(__rev1_588, __p2_588)); \ - __ret_588 = __builtin_shufflevector(__ret_588, __ret_588, 3, 2, 1, 0); \ - __ret_588; \ +#define vmull_high_lane_u16(__p0_604, __p1_604, __p2_604) __extension__ ({ \ + uint32x4_t __ret_604; \ + uint16x8_t __s0_604 = __p0_604; \ + uint16x4_t __s1_604 = __p1_604; \ + uint16x8_t __rev0_604; __rev0_604 = __builtin_shufflevector(__s0_604, __s0_604, __lane_reverse_128_16); \ + uint16x4_t __rev1_604; __rev1_604 = __builtin_shufflevector(__s1_604, __s1_604, __lane_reverse_64_16); \ + __ret_604 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_604), __noswap_splat_lane_u16(__rev1_604, __p2_604)); \ + __ret_604 = __builtin_shufflevector(__ret_604, __ret_604, __lane_reverse_128_32); \ + __ret_604; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_s32(__p0_589, __p1_589, __p2_589) __extension__ ({ \ - int64x2_t __ret_589; \ - int32x4_t __s0_589 = __p0_589; \ - int32x2_t __s1_589 = __p1_589; \ - __ret_589 = vmull_s32(vget_high_s32(__s0_589), splat_lane_s32(__s1_589, __p2_589)); \ - __ret_589; \ +#define vmull_high_lane_s32(__p0_605, __p1_605, __p2_605) __extension__ ({ \ + int64x2_t __ret_605; \ + int32x4_t __s0_605 = __p0_605; \ + int32x2_t __s1_605 = __p1_605; \ + __ret_605 = vmull_s32(vget_high_s32(__s0_605), splat_lane_s32(__s1_605, __p2_605)); \ + __ret_605; \ }) #else -#define vmull_high_lane_s32(__p0_590, __p1_590, __p2_590) __extension__ ({ \ - int64x2_t __ret_590; \ - int32x4_t __s0_590 = __p0_590; \ - int32x2_t __s1_590 = __p1_590; \ - int32x4_t __rev0_590; __rev0_590 = __builtin_shufflevector(__s0_590, __s0_590, 3, 2, 1, 0); \ - int32x2_t __rev1_590; __rev1_590 = __builtin_shufflevector(__s1_590, __s1_590, 1, 0); \ - __ret_590 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_590), __noswap_splat_lane_s32(__rev1_590, __p2_590)); \ - __ret_590 = __builtin_shufflevector(__ret_590, __ret_590, 1, 0); \ - __ret_590; \ +#define vmull_high_lane_s32(__p0_606, __p1_606, __p2_606) __extension__ ({ \ + int64x2_t __ret_606; \ + int32x4_t __s0_606 = __p0_606; \ + int32x2_t __s1_606 = __p1_606; \ + int32x4_t __rev0_606; __rev0_606 = __builtin_shufflevector(__s0_606, __s0_606, __lane_reverse_128_32); \ + int32x2_t __rev1_606; __rev1_606 = __builtin_shufflevector(__s1_606, __s1_606, __lane_reverse_64_32); \ + __ret_606 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_606), __noswap_splat_lane_s32(__rev1_606, __p2_606)); \ + __ret_606 = __builtin_shufflevector(__ret_606, __ret_606, __lane_reverse_128_64); \ + __ret_606; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_s16(__p0_591, __p1_591, __p2_591) __extension__ ({ \ - int32x4_t __ret_591; \ - int16x8_t __s0_591 = __p0_591; \ - int16x4_t __s1_591 = __p1_591; \ - __ret_591 = vmull_s16(vget_high_s16(__s0_591), splat_lane_s16(__s1_591, __p2_591)); \ - __ret_591; \ +#define vmull_high_lane_s16(__p0_607, __p1_607, __p2_607) __extension__ ({ \ + int32x4_t __ret_607; \ + int16x8_t __s0_607 = __p0_607; \ + int16x4_t __s1_607 = __p1_607; \ + __ret_607 = vmull_s16(vget_high_s16(__s0_607), splat_lane_s16(__s1_607, __p2_607)); \ + __ret_607; \ }) #else -#define vmull_high_lane_s16(__p0_592, __p1_592, __p2_592) __extension__ ({ \ - int32x4_t __ret_592; \ - int16x8_t __s0_592 = __p0_592; \ - int16x4_t __s1_592 = __p1_592; \ - int16x8_t __rev0_592; __rev0_592 = __builtin_shufflevector(__s0_592, __s0_592, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_592; __rev1_592 = __builtin_shufflevector(__s1_592, __s1_592, 3, 2, 1, 0); \ - __ret_592 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_592), __noswap_splat_lane_s16(__rev1_592, __p2_592)); \ - __ret_592 = __builtin_shufflevector(__ret_592, __ret_592, 3, 2, 1, 0); \ - __ret_592; \ +#define vmull_high_lane_s16(__p0_608, __p1_608, __p2_608) __extension__ ({ \ + int32x4_t __ret_608; \ + int16x8_t __s0_608 = __p0_608; \ + int16x4_t __s1_608 = __p1_608; \ + int16x8_t __rev0_608; __rev0_608 = __builtin_shufflevector(__s0_608, __s0_608, __lane_reverse_128_16); \ + int16x4_t __rev1_608; __rev1_608 = __builtin_shufflevector(__s1_608, __s1_608, __lane_reverse_64_16); \ + __ret_608 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_608), __noswap_splat_lane_s16(__rev1_608, __p2_608)); \ + __ret_608 = __builtin_shufflevector(__ret_608, __ret_608, __lane_reverse_128_32); \ + __ret_608; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_u32(__p0_593, __p1_593, __p2_593) __extension__ ({ \ - uint64x2_t __ret_593; \ - uint32x4_t __s0_593 = __p0_593; \ - uint32x4_t __s1_593 = __p1_593; \ - __ret_593 = vmull_u32(vget_high_u32(__s0_593), splat_laneq_u32(__s1_593, __p2_593)); \ - __ret_593; \ +#define vmull_high_laneq_u32(__p0_609, __p1_609, __p2_609) __extension__ ({ \ + uint64x2_t __ret_609; \ + uint32x4_t __s0_609 = __p0_609; \ + uint32x4_t __s1_609 = __p1_609; \ + __ret_609 = vmull_u32(vget_high_u32(__s0_609), splat_laneq_u32(__s1_609, __p2_609)); \ + __ret_609; \ }) #else -#define vmull_high_laneq_u32(__p0_594, __p1_594, __p2_594) __extension__ ({ \ - uint64x2_t __ret_594; \ - uint32x4_t __s0_594 = __p0_594; \ - uint32x4_t __s1_594 = __p1_594; \ - uint32x4_t __rev0_594; __rev0_594 = __builtin_shufflevector(__s0_594, __s0_594, 3, 2, 1, 0); \ - uint32x4_t __rev1_594; __rev1_594 = __builtin_shufflevector(__s1_594, __s1_594, 3, 2, 1, 0); \ - __ret_594 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_594), __noswap_splat_laneq_u32(__rev1_594, __p2_594)); \ - __ret_594 = __builtin_shufflevector(__ret_594, __ret_594, 1, 0); \ - __ret_594; \ +#define vmull_high_laneq_u32(__p0_610, __p1_610, __p2_610) __extension__ ({ \ + uint64x2_t __ret_610; \ + uint32x4_t __s0_610 = __p0_610; \ + uint32x4_t __s1_610 = __p1_610; \ + uint32x4_t __rev0_610; __rev0_610 = __builtin_shufflevector(__s0_610, __s0_610, __lane_reverse_128_32); \ + uint32x4_t __rev1_610; __rev1_610 = __builtin_shufflevector(__s1_610, __s1_610, __lane_reverse_128_32); \ + __ret_610 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_610), __noswap_splat_laneq_u32(__rev1_610, __p2_610)); \ + __ret_610 = __builtin_shufflevector(__ret_610, __ret_610, __lane_reverse_128_64); \ + __ret_610; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_u16(__p0_595, __p1_595, __p2_595) __extension__ ({ \ - uint32x4_t __ret_595; \ - uint16x8_t __s0_595 = __p0_595; \ - uint16x8_t __s1_595 = __p1_595; \ - __ret_595 = vmull_u16(vget_high_u16(__s0_595), splat_laneq_u16(__s1_595, __p2_595)); \ - __ret_595; \ +#define vmull_high_laneq_u16(__p0_611, __p1_611, __p2_611) __extension__ ({ \ + uint32x4_t __ret_611; \ + uint16x8_t __s0_611 = __p0_611; \ + uint16x8_t __s1_611 = __p1_611; \ + __ret_611 = vmull_u16(vget_high_u16(__s0_611), splat_laneq_u16(__s1_611, __p2_611)); \ + __ret_611; \ }) #else -#define vmull_high_laneq_u16(__p0_596, __p1_596, __p2_596) __extension__ ({ \ - uint32x4_t __ret_596; \ - uint16x8_t __s0_596 = __p0_596; \ - uint16x8_t __s1_596 = __p1_596; \ - uint16x8_t __rev0_596; __rev0_596 = __builtin_shufflevector(__s0_596, __s0_596, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_596; __rev1_596 = __builtin_shufflevector(__s1_596, __s1_596, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_596 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_596), __noswap_splat_laneq_u16(__rev1_596, __p2_596)); \ - __ret_596 = __builtin_shufflevector(__ret_596, __ret_596, 3, 2, 1, 0); \ - __ret_596; \ +#define vmull_high_laneq_u16(__p0_612, __p1_612, __p2_612) __extension__ ({ \ + uint32x4_t __ret_612; \ + uint16x8_t __s0_612 = __p0_612; \ + uint16x8_t __s1_612 = __p1_612; \ + uint16x8_t __rev0_612; __rev0_612 = __builtin_shufflevector(__s0_612, __s0_612, __lane_reverse_128_16); \ + uint16x8_t __rev1_612; __rev1_612 = __builtin_shufflevector(__s1_612, __s1_612, __lane_reverse_128_16); \ + __ret_612 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_612), __noswap_splat_laneq_u16(__rev1_612, __p2_612)); \ + __ret_612 = __builtin_shufflevector(__ret_612, __ret_612, __lane_reverse_128_32); \ + __ret_612; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_s32(__p0_597, __p1_597, __p2_597) __extension__ ({ \ - int64x2_t __ret_597; \ - int32x4_t __s0_597 = __p0_597; \ - int32x4_t __s1_597 = __p1_597; \ - __ret_597 = vmull_s32(vget_high_s32(__s0_597), splat_laneq_s32(__s1_597, __p2_597)); \ - __ret_597; \ +#define vmull_high_laneq_s32(__p0_613, __p1_613, __p2_613) __extension__ ({ \ + int64x2_t __ret_613; \ + int32x4_t __s0_613 = __p0_613; \ + int32x4_t __s1_613 = __p1_613; \ + __ret_613 = vmull_s32(vget_high_s32(__s0_613), splat_laneq_s32(__s1_613, __p2_613)); \ + __ret_613; \ }) #else -#define vmull_high_laneq_s32(__p0_598, __p1_598, __p2_598) __extension__ ({ \ - int64x2_t __ret_598; \ - int32x4_t __s0_598 = __p0_598; \ - int32x4_t __s1_598 = __p1_598; \ - int32x4_t __rev0_598; __rev0_598 = __builtin_shufflevector(__s0_598, __s0_598, 3, 2, 1, 0); \ - int32x4_t __rev1_598; __rev1_598 = __builtin_shufflevector(__s1_598, __s1_598, 3, 2, 1, 0); \ - __ret_598 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_598), __noswap_splat_laneq_s32(__rev1_598, __p2_598)); \ - __ret_598 = __builtin_shufflevector(__ret_598, __ret_598, 1, 0); \ - __ret_598; \ +#define vmull_high_laneq_s32(__p0_614, __p1_614, __p2_614) __extension__ ({ \ + int64x2_t __ret_614; \ + int32x4_t __s0_614 = __p0_614; \ + int32x4_t __s1_614 = __p1_614; \ + int32x4_t __rev0_614; __rev0_614 = __builtin_shufflevector(__s0_614, __s0_614, __lane_reverse_128_32); \ + int32x4_t __rev1_614; __rev1_614 = __builtin_shufflevector(__s1_614, __s1_614, __lane_reverse_128_32); \ + __ret_614 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_614), __noswap_splat_laneq_s32(__rev1_614, __p2_614)); \ + __ret_614 = __builtin_shufflevector(__ret_614, __ret_614, __lane_reverse_128_64); \ + __ret_614; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_s16(__p0_599, __p1_599, __p2_599) __extension__ ({ \ - int32x4_t __ret_599; \ - int16x8_t __s0_599 = __p0_599; \ - int16x8_t __s1_599 = __p1_599; \ - __ret_599 = vmull_s16(vget_high_s16(__s0_599), splat_laneq_s16(__s1_599, __p2_599)); \ - __ret_599; \ +#define vmull_high_laneq_s16(__p0_615, __p1_615, __p2_615) __extension__ ({ \ + int32x4_t __ret_615; \ + int16x8_t __s0_615 = __p0_615; \ + int16x8_t __s1_615 = __p1_615; \ + __ret_615 = vmull_s16(vget_high_s16(__s0_615), splat_laneq_s16(__s1_615, __p2_615)); \ + __ret_615; \ }) #else -#define vmull_high_laneq_s16(__p0_600, __p1_600, __p2_600) __extension__ ({ \ - int32x4_t __ret_600; \ - int16x8_t __s0_600 = __p0_600; \ - int16x8_t __s1_600 = __p1_600; \ - int16x8_t __rev0_600; __rev0_600 = __builtin_shufflevector(__s0_600, __s0_600, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_600; __rev1_600 = __builtin_shufflevector(__s1_600, __s1_600, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_600 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_600), __noswap_splat_laneq_s16(__rev1_600, __p2_600)); \ - __ret_600 = __builtin_shufflevector(__ret_600, __ret_600, 3, 2, 1, 0); \ - __ret_600; \ +#define vmull_high_laneq_s16(__p0_616, __p1_616, __p2_616) __extension__ ({ \ + int32x4_t __ret_616; \ + int16x8_t __s0_616 = __p0_616; \ + int16x8_t __s1_616 = __p1_616; \ + int16x8_t __rev0_616; __rev0_616 = __builtin_shufflevector(__s0_616, __s0_616, __lane_reverse_128_16); \ + int16x8_t __rev1_616; __rev1_616 = __builtin_shufflevector(__s1_616, __s1_616, __lane_reverse_128_16); \ + __ret_616 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_616), __noswap_splat_laneq_s16(__rev1_616, __p2_616)); \ + __ret_616 = __builtin_shufflevector(__ret_616, __ret_616, __lane_reverse_128_32); \ + __ret_616; \ }) #endif @@ -54979,9 +56637,9 @@ __ai __attribute__((target("neon"))) uint64x2_t vmull_high_n_u32(uint32x4_t __p0 #else __ai __attribute__((target("neon"))) uint64x2_t vmull_high_n_u32(uint32x4_t __p0, uint32_t __p1) { uint64x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap_vmull_n_u32(__noswap_vget_high_u32(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -54995,9 +56653,9 @@ __ai __attribute__((target("neon"))) uint32x4_t vmull_high_n_u16(uint16x8_t __p0 #else __ai __attribute__((target("neon"))) uint32x4_t vmull_high_n_u16(uint16x8_t __p0, uint16_t __p1) { uint32x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vmull_n_u16(__noswap_vget_high_u16(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55011,9 +56669,9 @@ __ai __attribute__((target("neon"))) int64x2_t vmull_high_n_s32(int32x4_t __p0, #else __ai __attribute__((target("neon"))) int64x2_t vmull_high_n_s32(int32x4_t __p0, int32_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap_vmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55027,115 +56685,115 @@ __ai __attribute__((target("neon"))) int32x4_t vmull_high_n_s16(int16x8_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_u32(__p0_601, __p1_601, __p2_601) __extension__ ({ \ - uint64x2_t __ret_601; \ - uint32x2_t __s0_601 = __p0_601; \ - uint32x4_t __s1_601 = __p1_601; \ - __ret_601 = vmull_u32(__s0_601, splat_laneq_u32(__s1_601, __p2_601)); \ - __ret_601; \ +#define vmull_laneq_u32(__p0_617, __p1_617, __p2_617) __extension__ ({ \ + uint64x2_t __ret_617; \ + uint32x2_t __s0_617 = __p0_617; \ + uint32x4_t __s1_617 = __p1_617; \ + __ret_617 = vmull_u32(__s0_617, splat_laneq_u32(__s1_617, __p2_617)); \ + __ret_617; \ }) #else -#define vmull_laneq_u32(__p0_602, __p1_602, __p2_602) __extension__ ({ \ - uint64x2_t __ret_602; \ - uint32x2_t __s0_602 = __p0_602; \ - uint32x4_t __s1_602 = __p1_602; \ - uint32x2_t __rev0_602; __rev0_602 = __builtin_shufflevector(__s0_602, __s0_602, 1, 0); \ - uint32x4_t __rev1_602; __rev1_602 = __builtin_shufflevector(__s1_602, __s1_602, 3, 2, 1, 0); \ - __ret_602 = __noswap_vmull_u32(__rev0_602, __noswap_splat_laneq_u32(__rev1_602, __p2_602)); \ - __ret_602 = __builtin_shufflevector(__ret_602, __ret_602, 1, 0); \ - __ret_602; \ +#define vmull_laneq_u32(__p0_618, __p1_618, __p2_618) __extension__ ({ \ + uint64x2_t __ret_618; \ + uint32x2_t __s0_618 = __p0_618; \ + uint32x4_t __s1_618 = __p1_618; \ + uint32x2_t __rev0_618; __rev0_618 = __builtin_shufflevector(__s0_618, __s0_618, __lane_reverse_64_32); \ + uint32x4_t __rev1_618; __rev1_618 = __builtin_shufflevector(__s1_618, __s1_618, __lane_reverse_128_32); \ + __ret_618 = __noswap_vmull_u32(__rev0_618, __noswap_splat_laneq_u32(__rev1_618, __p2_618)); \ + __ret_618 = __builtin_shufflevector(__ret_618, __ret_618, __lane_reverse_128_64); \ + __ret_618; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_u16(__p0_603, __p1_603, __p2_603) __extension__ ({ \ - uint32x4_t __ret_603; \ - uint16x4_t __s0_603 = __p0_603; \ - uint16x8_t __s1_603 = __p1_603; \ - __ret_603 = vmull_u16(__s0_603, splat_laneq_u16(__s1_603, __p2_603)); \ - __ret_603; \ +#define vmull_laneq_u16(__p0_619, __p1_619, __p2_619) __extension__ ({ \ + uint32x4_t __ret_619; \ + uint16x4_t __s0_619 = __p0_619; \ + uint16x8_t __s1_619 = __p1_619; \ + __ret_619 = vmull_u16(__s0_619, splat_laneq_u16(__s1_619, __p2_619)); \ + __ret_619; \ }) #else -#define vmull_laneq_u16(__p0_604, __p1_604, __p2_604) __extension__ ({ \ - uint32x4_t __ret_604; \ - uint16x4_t __s0_604 = __p0_604; \ - uint16x8_t __s1_604 = __p1_604; \ - uint16x4_t __rev0_604; __rev0_604 = __builtin_shufflevector(__s0_604, __s0_604, 3, 2, 1, 0); \ - uint16x8_t __rev1_604; __rev1_604 = __builtin_shufflevector(__s1_604, __s1_604, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_604 = __noswap_vmull_u16(__rev0_604, __noswap_splat_laneq_u16(__rev1_604, __p2_604)); \ - __ret_604 = __builtin_shufflevector(__ret_604, __ret_604, 3, 2, 1, 0); \ - __ret_604; \ +#define vmull_laneq_u16(__p0_620, __p1_620, __p2_620) __extension__ ({ \ + uint32x4_t __ret_620; \ + uint16x4_t __s0_620 = __p0_620; \ + uint16x8_t __s1_620 = __p1_620; \ + uint16x4_t __rev0_620; __rev0_620 = __builtin_shufflevector(__s0_620, __s0_620, __lane_reverse_64_16); \ + uint16x8_t __rev1_620; __rev1_620 = __builtin_shufflevector(__s1_620, __s1_620, __lane_reverse_128_16); \ + __ret_620 = __noswap_vmull_u16(__rev0_620, __noswap_splat_laneq_u16(__rev1_620, __p2_620)); \ + __ret_620 = __builtin_shufflevector(__ret_620, __ret_620, __lane_reverse_128_32); \ + __ret_620; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_s32(__p0_605, __p1_605, __p2_605) __extension__ ({ \ - int64x2_t __ret_605; \ - int32x2_t __s0_605 = __p0_605; \ - int32x4_t __s1_605 = __p1_605; \ - __ret_605 = vmull_s32(__s0_605, splat_laneq_s32(__s1_605, __p2_605)); \ - __ret_605; \ +#define vmull_laneq_s32(__p0_621, __p1_621, __p2_621) __extension__ ({ \ + int64x2_t __ret_621; \ + int32x2_t __s0_621 = __p0_621; \ + int32x4_t __s1_621 = __p1_621; \ + __ret_621 = vmull_s32(__s0_621, splat_laneq_s32(__s1_621, __p2_621)); \ + __ret_621; \ }) #else -#define vmull_laneq_s32(__p0_606, __p1_606, __p2_606) __extension__ ({ \ - int64x2_t __ret_606; \ - int32x2_t __s0_606 = __p0_606; \ - int32x4_t __s1_606 = __p1_606; \ - int32x2_t __rev0_606; __rev0_606 = __builtin_shufflevector(__s0_606, __s0_606, 1, 0); \ - int32x4_t __rev1_606; __rev1_606 = __builtin_shufflevector(__s1_606, __s1_606, 3, 2, 1, 0); \ - __ret_606 = __noswap_vmull_s32(__rev0_606, __noswap_splat_laneq_s32(__rev1_606, __p2_606)); \ - __ret_606 = __builtin_shufflevector(__ret_606, __ret_606, 1, 0); \ - __ret_606; \ +#define vmull_laneq_s32(__p0_622, __p1_622, __p2_622) __extension__ ({ \ + int64x2_t __ret_622; \ + int32x2_t __s0_622 = __p0_622; \ + int32x4_t __s1_622 = __p1_622; \ + int32x2_t __rev0_622; __rev0_622 = __builtin_shufflevector(__s0_622, __s0_622, __lane_reverse_64_32); \ + int32x4_t __rev1_622; __rev1_622 = __builtin_shufflevector(__s1_622, __s1_622, __lane_reverse_128_32); \ + __ret_622 = __noswap_vmull_s32(__rev0_622, __noswap_splat_laneq_s32(__rev1_622, __p2_622)); \ + __ret_622 = __builtin_shufflevector(__ret_622, __ret_622, __lane_reverse_128_64); \ + __ret_622; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_s16(__p0_607, __p1_607, __p2_607) __extension__ ({ \ - int32x4_t __ret_607; \ - int16x4_t __s0_607 = __p0_607; \ - int16x8_t __s1_607 = __p1_607; \ - __ret_607 = vmull_s16(__s0_607, splat_laneq_s16(__s1_607, __p2_607)); \ - __ret_607; \ +#define vmull_laneq_s16(__p0_623, __p1_623, __p2_623) __extension__ ({ \ + int32x4_t __ret_623; \ + int16x4_t __s0_623 = __p0_623; \ + int16x8_t __s1_623 = __p1_623; \ + __ret_623 = vmull_s16(__s0_623, splat_laneq_s16(__s1_623, __p2_623)); \ + __ret_623; \ }) #else -#define vmull_laneq_s16(__p0_608, __p1_608, __p2_608) __extension__ ({ \ - int32x4_t __ret_608; \ - int16x4_t __s0_608 = __p0_608; \ - int16x8_t __s1_608 = __p1_608; \ - int16x4_t __rev0_608; __rev0_608 = __builtin_shufflevector(__s0_608, __s0_608, 3, 2, 1, 0); \ - int16x8_t __rev1_608; __rev1_608 = __builtin_shufflevector(__s1_608, __s1_608, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_608 = __noswap_vmull_s16(__rev0_608, __noswap_splat_laneq_s16(__rev1_608, __p2_608)); \ - __ret_608 = __builtin_shufflevector(__ret_608, __ret_608, 3, 2, 1, 0); \ - __ret_608; \ +#define vmull_laneq_s16(__p0_624, __p1_624, __p2_624) __extension__ ({ \ + int32x4_t __ret_624; \ + int16x4_t __s0_624 = __p0_624; \ + int16x8_t __s1_624 = __p1_624; \ + int16x4_t __rev0_624; __rev0_624 = __builtin_shufflevector(__s0_624, __s0_624, __lane_reverse_64_16); \ + int16x8_t __rev1_624; __rev1_624 = __builtin_shufflevector(__s1_624, __s1_624, __lane_reverse_128_16); \ + __ret_624 = __noswap_vmull_s16(__rev0_624, __noswap_splat_laneq_s16(__rev1_624, __p2_624)); \ + __ret_624 = __builtin_shufflevector(__ret_624, __ret_624, __lane_reverse_128_32); \ + __ret_624; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmulxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmulxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) float64x2_t __noswap_vmulxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vmulxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #endif @@ -55143,251 +56801,251 @@ __ai __attribute__((target("neon"))) float64x2_t __noswap_vmulxq_f64(float64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmulxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmulxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) float32x4_t __noswap_vmulxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vmulxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vmulxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vmulx_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vmulx_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmulx_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmulx_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } __ai __attribute__((target("neon"))) float32x2_t __noswap_vmulx_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vmulx_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vmulx_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #endif __ai __attribute__((target("neon"))) float64_t vmulxd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vmulxd_f64(__p0, __p1); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vmulxd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vmulxs_f32(__p0, __p1)); return __ret; } -#define vmulxd_lane_f64(__p0_609, __p1_609, __p2_609) __extension__ ({ \ - float64_t __ret_609; \ - float64_t __s0_609 = __p0_609; \ - float64x1_t __s1_609 = __p1_609; \ - __ret_609 = vmulxd_f64(__s0_609, vget_lane_f64(__s1_609, __p2_609)); \ - __ret_609; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vmulxs_lane_f32(__p0_610, __p1_610, __p2_610) __extension__ ({ \ - float32_t __ret_610; \ - float32_t __s0_610 = __p0_610; \ - float32x2_t __s1_610 = __p1_610; \ - __ret_610 = vmulxs_f32(__s0_610, vget_lane_f32(__s1_610, __p2_610)); \ - __ret_610; \ -}) -#else -#define vmulxs_lane_f32(__p0_611, __p1_611, __p2_611) __extension__ ({ \ - float32_t __ret_611; \ - float32_t __s0_611 = __p0_611; \ - float32x2_t __s1_611 = __p1_611; \ - float32x2_t __rev1_611; __rev1_611 = __builtin_shufflevector(__s1_611, __s1_611, 1, 0); \ - __ret_611 = vmulxs_f32(__s0_611, __noswap_vget_lane_f32(__rev1_611, __p2_611)); \ - __ret_611; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f64(__p0_612, __p1_612, __p2_612) __extension__ ({ \ - float64x2_t __ret_612; \ - float64x2_t __s0_612 = __p0_612; \ - float64x1_t __s1_612 = __p1_612; \ - __ret_612 = vmulxq_f64(__s0_612, splatq_lane_f64(__s1_612, __p2_612)); \ - __ret_612; \ -}) -#else -#define vmulxq_lane_f64(__p0_613, __p1_613, __p2_613) __extension__ ({ \ - float64x2_t __ret_613; \ - float64x2_t __s0_613 = __p0_613; \ - float64x1_t __s1_613 = __p1_613; \ - float64x2_t __rev0_613; __rev0_613 = __builtin_shufflevector(__s0_613, __s0_613, 1, 0); \ - __ret_613 = __noswap_vmulxq_f64(__rev0_613, __noswap_splatq_lane_f64(__s1_613, __p2_613)); \ - __ret_613 = __builtin_shufflevector(__ret_613, __ret_613, 1, 0); \ - __ret_613; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f32(__p0_614, __p1_614, __p2_614) __extension__ ({ \ - float32x4_t __ret_614; \ - float32x4_t __s0_614 = __p0_614; \ - float32x2_t __s1_614 = __p1_614; \ - __ret_614 = vmulxq_f32(__s0_614, splatq_lane_f32(__s1_614, __p2_614)); \ - __ret_614; \ -}) -#else -#define vmulxq_lane_f32(__p0_615, __p1_615, __p2_615) __extension__ ({ \ - float32x4_t __ret_615; \ - float32x4_t __s0_615 = __p0_615; \ - float32x2_t __s1_615 = __p1_615; \ - float32x4_t __rev0_615; __rev0_615 = __builtin_shufflevector(__s0_615, __s0_615, 3, 2, 1, 0); \ - float32x2_t __rev1_615; __rev1_615 = __builtin_shufflevector(__s1_615, __s1_615, 1, 0); \ - __ret_615 = __noswap_vmulxq_f32(__rev0_615, __noswap_splatq_lane_f32(__rev1_615, __p2_615)); \ - __ret_615 = __builtin_shufflevector(__ret_615, __ret_615, 3, 2, 1, 0); \ - __ret_615; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f32(__p0_616, __p1_616, __p2_616) __extension__ ({ \ - float32x2_t __ret_616; \ - float32x2_t __s0_616 = __p0_616; \ - float32x2_t __s1_616 = __p1_616; \ - __ret_616 = vmulx_f32(__s0_616, splat_lane_f32(__s1_616, __p2_616)); \ - __ret_616; \ -}) -#else -#define vmulx_lane_f32(__p0_617, __p1_617, __p2_617) __extension__ ({ \ - float32x2_t __ret_617; \ - float32x2_t __s0_617 = __p0_617; \ - float32x2_t __s1_617 = __p1_617; \ - float32x2_t __rev0_617; __rev0_617 = __builtin_shufflevector(__s0_617, __s0_617, 1, 0); \ - float32x2_t __rev1_617; __rev1_617 = __builtin_shufflevector(__s1_617, __s1_617, 1, 0); \ - __ret_617 = __noswap_vmulx_f32(__rev0_617, __noswap_splat_lane_f32(__rev1_617, __p2_617)); \ - __ret_617 = __builtin_shufflevector(__ret_617, __ret_617, 1, 0); \ - __ret_617; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxd_laneq_f64(__p0_618, __p1_618, __p2_618) __extension__ ({ \ - float64_t __ret_618; \ - float64_t __s0_618 = __p0_618; \ - float64x2_t __s1_618 = __p1_618; \ - __ret_618 = vmulxd_f64(__s0_618, vgetq_lane_f64(__s1_618, __p2_618)); \ - __ret_618; \ -}) -#else -#define vmulxd_laneq_f64(__p0_619, __p1_619, __p2_619) __extension__ ({ \ - float64_t __ret_619; \ - float64_t __s0_619 = __p0_619; \ - float64x2_t __s1_619 = __p1_619; \ - float64x2_t __rev1_619; __rev1_619 = __builtin_shufflevector(__s1_619, __s1_619, 1, 0); \ - __ret_619 = vmulxd_f64(__s0_619, __noswap_vgetq_lane_f64(__rev1_619, __p2_619)); \ - __ret_619; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxs_laneq_f32(__p0_620, __p1_620, __p2_620) __extension__ ({ \ - float32_t __ret_620; \ - float32_t __s0_620 = __p0_620; \ - float32x4_t __s1_620 = __p1_620; \ - __ret_620 = vmulxs_f32(__s0_620, vgetq_lane_f32(__s1_620, __p2_620)); \ - __ret_620; \ -}) -#else -#define vmulxs_laneq_f32(__p0_621, __p1_621, __p2_621) __extension__ ({ \ - float32_t __ret_621; \ - float32_t __s0_621 = __p0_621; \ - float32x4_t __s1_621 = __p1_621; \ - float32x4_t __rev1_621; __rev1_621 = __builtin_shufflevector(__s1_621, __s1_621, 3, 2, 1, 0); \ - __ret_621 = vmulxs_f32(__s0_621, __noswap_vgetq_lane_f32(__rev1_621, __p2_621)); \ - __ret_621; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f64(__p0_622, __p1_622, __p2_622) __extension__ ({ \ - float64x2_t __ret_622; \ - float64x2_t __s0_622 = __p0_622; \ - float64x2_t __s1_622 = __p1_622; \ - __ret_622 = vmulxq_f64(__s0_622, splatq_laneq_f64(__s1_622, __p2_622)); \ - __ret_622; \ -}) -#else -#define vmulxq_laneq_f64(__p0_623, __p1_623, __p2_623) __extension__ ({ \ - float64x2_t __ret_623; \ - float64x2_t __s0_623 = __p0_623; \ - float64x2_t __s1_623 = __p1_623; \ - float64x2_t __rev0_623; __rev0_623 = __builtin_shufflevector(__s0_623, __s0_623, 1, 0); \ - float64x2_t __rev1_623; __rev1_623 = __builtin_shufflevector(__s1_623, __s1_623, 1, 0); \ - __ret_623 = __noswap_vmulxq_f64(__rev0_623, __noswap_splatq_laneq_f64(__rev1_623, __p2_623)); \ - __ret_623 = __builtin_shufflevector(__ret_623, __ret_623, 1, 0); \ - __ret_623; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f32(__p0_624, __p1_624, __p2_624) __extension__ ({ \ - float32x4_t __ret_624; \ - float32x4_t __s0_624 = __p0_624; \ - float32x4_t __s1_624 = __p1_624; \ - __ret_624 = vmulxq_f32(__s0_624, splatq_laneq_f32(__s1_624, __p2_624)); \ - __ret_624; \ -}) -#else -#define vmulxq_laneq_f32(__p0_625, __p1_625, __p2_625) __extension__ ({ \ - float32x4_t __ret_625; \ - float32x4_t __s0_625 = __p0_625; \ - float32x4_t __s1_625 = __p1_625; \ - float32x4_t __rev0_625; __rev0_625 = __builtin_shufflevector(__s0_625, __s0_625, 3, 2, 1, 0); \ - float32x4_t __rev1_625; __rev1_625 = __builtin_shufflevector(__s1_625, __s1_625, 3, 2, 1, 0); \ - __ret_625 = __noswap_vmulxq_f32(__rev0_625, __noswap_splatq_laneq_f32(__rev1_625, __p2_625)); \ - __ret_625 = __builtin_shufflevector(__ret_625, __ret_625, 3, 2, 1, 0); \ +#define vmulxd_lane_f64(__p0_625, __p1_625, __p2_625) __extension__ ({ \ + float64_t __ret_625; \ + float64_t __s0_625 = __p0_625; \ + float64x1_t __s1_625 = __p1_625; \ + __ret_625 = vmulxd_f64(__s0_625, vget_lane_f64(__s1_625, __p2_625)); \ __ret_625; \ }) -#endif - #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f32(__p0_626, __p1_626, __p2_626) __extension__ ({ \ - float32x2_t __ret_626; \ - float32x2_t __s0_626 = __p0_626; \ - float32x4_t __s1_626 = __p1_626; \ - __ret_626 = vmulx_f32(__s0_626, splat_laneq_f32(__s1_626, __p2_626)); \ +#define vmulxs_lane_f32(__p0_626, __p1_626, __p2_626) __extension__ ({ \ + float32_t __ret_626; \ + float32_t __s0_626 = __p0_626; \ + float32x2_t __s1_626 = __p1_626; \ + __ret_626 = vmulxs_f32(__s0_626, vget_lane_f32(__s1_626, __p2_626)); \ __ret_626; \ }) #else -#define vmulx_laneq_f32(__p0_627, __p1_627, __p2_627) __extension__ ({ \ - float32x2_t __ret_627; \ - float32x2_t __s0_627 = __p0_627; \ - float32x4_t __s1_627 = __p1_627; \ - float32x2_t __rev0_627; __rev0_627 = __builtin_shufflevector(__s0_627, __s0_627, 1, 0); \ - float32x4_t __rev1_627; __rev1_627 = __builtin_shufflevector(__s1_627, __s1_627, 3, 2, 1, 0); \ - __ret_627 = __noswap_vmulx_f32(__rev0_627, __noswap_splat_laneq_f32(__rev1_627, __p2_627)); \ - __ret_627 = __builtin_shufflevector(__ret_627, __ret_627, 1, 0); \ +#define vmulxs_lane_f32(__p0_627, __p1_627, __p2_627) __extension__ ({ \ + float32_t __ret_627; \ + float32_t __s0_627 = __p0_627; \ + float32x2_t __s1_627 = __p1_627; \ + float32x2_t __rev1_627; __rev1_627 = __builtin_shufflevector(__s1_627, __s1_627, __lane_reverse_64_32); \ + __ret_627 = vmulxs_f32(__s0_627, __noswap_vget_lane_f32(__rev1_627, __p2_627)); \ __ret_627; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vmulxq_lane_f64(__p0_628, __p1_628, __p2_628) __extension__ ({ \ + float64x2_t __ret_628; \ + float64x2_t __s0_628 = __p0_628; \ + float64x1_t __s1_628 = __p1_628; \ + __ret_628 = vmulxq_f64(__s0_628, splatq_lane_f64(__s1_628, __p2_628)); \ + __ret_628; \ +}) +#else +#define vmulxq_lane_f64(__p0_629, __p1_629, __p2_629) __extension__ ({ \ + float64x2_t __ret_629; \ + float64x2_t __s0_629 = __p0_629; \ + float64x1_t __s1_629 = __p1_629; \ + float64x2_t __rev0_629; __rev0_629 = __builtin_shufflevector(__s0_629, __s0_629, __lane_reverse_128_64); \ + __ret_629 = __noswap_vmulxq_f64(__rev0_629, __noswap_splatq_lane_f64(__s1_629, __p2_629)); \ + __ret_629 = __builtin_shufflevector(__ret_629, __ret_629, __lane_reverse_128_64); \ + __ret_629; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxq_lane_f32(__p0_630, __p1_630, __p2_630) __extension__ ({ \ + float32x4_t __ret_630; \ + float32x4_t __s0_630 = __p0_630; \ + float32x2_t __s1_630 = __p1_630; \ + __ret_630 = vmulxq_f32(__s0_630, splatq_lane_f32(__s1_630, __p2_630)); \ + __ret_630; \ +}) +#else +#define vmulxq_lane_f32(__p0_631, __p1_631, __p2_631) __extension__ ({ \ + float32x4_t __ret_631; \ + float32x4_t __s0_631 = __p0_631; \ + float32x2_t __s1_631 = __p1_631; \ + float32x4_t __rev0_631; __rev0_631 = __builtin_shufflevector(__s0_631, __s0_631, __lane_reverse_128_32); \ + float32x2_t __rev1_631; __rev1_631 = __builtin_shufflevector(__s1_631, __s1_631, __lane_reverse_64_32); \ + __ret_631 = __noswap_vmulxq_f32(__rev0_631, __noswap_splatq_lane_f32(__rev1_631, __p2_631)); \ + __ret_631 = __builtin_shufflevector(__ret_631, __ret_631, __lane_reverse_128_32); \ + __ret_631; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulx_lane_f32(__p0_632, __p1_632, __p2_632) __extension__ ({ \ + float32x2_t __ret_632; \ + float32x2_t __s0_632 = __p0_632; \ + float32x2_t __s1_632 = __p1_632; \ + __ret_632 = vmulx_f32(__s0_632, splat_lane_f32(__s1_632, __p2_632)); \ + __ret_632; \ +}) +#else +#define vmulx_lane_f32(__p0_633, __p1_633, __p2_633) __extension__ ({ \ + float32x2_t __ret_633; \ + float32x2_t __s0_633 = __p0_633; \ + float32x2_t __s1_633 = __p1_633; \ + float32x2_t __rev0_633; __rev0_633 = __builtin_shufflevector(__s0_633, __s0_633, __lane_reverse_64_32); \ + float32x2_t __rev1_633; __rev1_633 = __builtin_shufflevector(__s1_633, __s1_633, __lane_reverse_64_32); \ + __ret_633 = __noswap_vmulx_f32(__rev0_633, __noswap_splat_lane_f32(__rev1_633, __p2_633)); \ + __ret_633 = __builtin_shufflevector(__ret_633, __ret_633, __lane_reverse_64_32); \ + __ret_633; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxd_laneq_f64(__p0_634, __p1_634, __p2_634) __extension__ ({ \ + float64_t __ret_634; \ + float64_t __s0_634 = __p0_634; \ + float64x2_t __s1_634 = __p1_634; \ + __ret_634 = vmulxd_f64(__s0_634, vgetq_lane_f64(__s1_634, __p2_634)); \ + __ret_634; \ +}) +#else +#define vmulxd_laneq_f64(__p0_635, __p1_635, __p2_635) __extension__ ({ \ + float64_t __ret_635; \ + float64_t __s0_635 = __p0_635; \ + float64x2_t __s1_635 = __p1_635; \ + float64x2_t __rev1_635; __rev1_635 = __builtin_shufflevector(__s1_635, __s1_635, __lane_reverse_128_64); \ + __ret_635 = vmulxd_f64(__s0_635, __noswap_vgetq_lane_f64(__rev1_635, __p2_635)); \ + __ret_635; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxs_laneq_f32(__p0_636, __p1_636, __p2_636) __extension__ ({ \ + float32_t __ret_636; \ + float32_t __s0_636 = __p0_636; \ + float32x4_t __s1_636 = __p1_636; \ + __ret_636 = vmulxs_f32(__s0_636, vgetq_lane_f32(__s1_636, __p2_636)); \ + __ret_636; \ +}) +#else +#define vmulxs_laneq_f32(__p0_637, __p1_637, __p2_637) __extension__ ({ \ + float32_t __ret_637; \ + float32_t __s0_637 = __p0_637; \ + float32x4_t __s1_637 = __p1_637; \ + float32x4_t __rev1_637; __rev1_637 = __builtin_shufflevector(__s1_637, __s1_637, __lane_reverse_128_32); \ + __ret_637 = vmulxs_f32(__s0_637, __noswap_vgetq_lane_f32(__rev1_637, __p2_637)); \ + __ret_637; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxq_laneq_f64(__p0_638, __p1_638, __p2_638) __extension__ ({ \ + float64x2_t __ret_638; \ + float64x2_t __s0_638 = __p0_638; \ + float64x2_t __s1_638 = __p1_638; \ + __ret_638 = vmulxq_f64(__s0_638, splatq_laneq_f64(__s1_638, __p2_638)); \ + __ret_638; \ +}) +#else +#define vmulxq_laneq_f64(__p0_639, __p1_639, __p2_639) __extension__ ({ \ + float64x2_t __ret_639; \ + float64x2_t __s0_639 = __p0_639; \ + float64x2_t __s1_639 = __p1_639; \ + float64x2_t __rev0_639; __rev0_639 = __builtin_shufflevector(__s0_639, __s0_639, __lane_reverse_128_64); \ + float64x2_t __rev1_639; __rev1_639 = __builtin_shufflevector(__s1_639, __s1_639, __lane_reverse_128_64); \ + __ret_639 = __noswap_vmulxq_f64(__rev0_639, __noswap_splatq_laneq_f64(__rev1_639, __p2_639)); \ + __ret_639 = __builtin_shufflevector(__ret_639, __ret_639, __lane_reverse_128_64); \ + __ret_639; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulxq_laneq_f32(__p0_640, __p1_640, __p2_640) __extension__ ({ \ + float32x4_t __ret_640; \ + float32x4_t __s0_640 = __p0_640; \ + float32x4_t __s1_640 = __p1_640; \ + __ret_640 = vmulxq_f32(__s0_640, splatq_laneq_f32(__s1_640, __p2_640)); \ + __ret_640; \ +}) +#else +#define vmulxq_laneq_f32(__p0_641, __p1_641, __p2_641) __extension__ ({ \ + float32x4_t __ret_641; \ + float32x4_t __s0_641 = __p0_641; \ + float32x4_t __s1_641 = __p1_641; \ + float32x4_t __rev0_641; __rev0_641 = __builtin_shufflevector(__s0_641, __s0_641, __lane_reverse_128_32); \ + float32x4_t __rev1_641; __rev1_641 = __builtin_shufflevector(__s1_641, __s1_641, __lane_reverse_128_32); \ + __ret_641 = __noswap_vmulxq_f32(__rev0_641, __noswap_splatq_laneq_f32(__rev1_641, __p2_641)); \ + __ret_641 = __builtin_shufflevector(__ret_641, __ret_641, __lane_reverse_128_32); \ + __ret_641; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulx_laneq_f32(__p0_642, __p1_642, __p2_642) __extension__ ({ \ + float32x2_t __ret_642; \ + float32x2_t __s0_642 = __p0_642; \ + float32x4_t __s1_642 = __p1_642; \ + __ret_642 = vmulx_f32(__s0_642, splat_laneq_f32(__s1_642, __p2_642)); \ + __ret_642; \ +}) +#else +#define vmulx_laneq_f32(__p0_643, __p1_643, __p2_643) __extension__ ({ \ + float32x2_t __ret_643; \ + float32x2_t __s0_643 = __p0_643; \ + float32x4_t __s1_643 = __p1_643; \ + float32x2_t __rev0_643; __rev0_643 = __builtin_shufflevector(__s0_643, __s0_643, __lane_reverse_64_32); \ + float32x4_t __rev1_643; __rev1_643 = __builtin_shufflevector(__s1_643, __s1_643, __lane_reverse_128_32); \ + __ret_643 = __noswap_vmulx_f32(__rev0_643, __noswap_splat_laneq_f32(__rev1_643, __p2_643)); \ + __ret_643 = __builtin_shufflevector(__ret_643, __ret_643, __lane_reverse_64_32); \ + __ret_643; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vnegq_f64(float64x2_t __p0) { float64x2_t __ret; @@ -55397,9 +57055,9 @@ __ai __attribute__((target("neon"))) float64x2_t vnegq_f64(float64x2_t __p0) { #else __ai __attribute__((target("neon"))) float64x2_t vnegq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55413,9 +57071,9 @@ __ai __attribute__((target("neon"))) int64x2_t vnegq_s64(int64x2_t __p0) { #else __ai __attribute__((target("neon"))) int64x2_t vnegq_s64(int64x2_t __p0) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); __ret = -__rev0; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55432,22 +57090,22 @@ __ai __attribute__((target("neon"))) int64x1_t vneg_s64(int64x1_t __p0) { } __ai __attribute__((target("neon"))) int64_t vnegd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vnegd_s64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vnegd_s64(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -55455,16 +57113,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vpaddq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vpaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55472,16 +57130,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vpaddq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vpaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55489,16 +57147,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vpaddq_u64(uint64x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vpaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -55506,16 +57164,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vpaddq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vpaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -55523,16 +57181,16 @@ __ai __attribute__((target("neon"))) int8x16_t vpaddq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vpaddq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55540,16 +57198,16 @@ __ai __attribute__((target("neon"))) float64x2_t vpaddq_f64(float64x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vpaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55557,16 +57215,16 @@ __ai __attribute__((target("neon"))) float32x4_t vpaddq_f32(float32x4_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vpaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55574,16 +57232,16 @@ __ai __attribute__((target("neon"))) int32x4_t vpaddq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vpaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55591,16 +57249,16 @@ __ai __attribute__((target("neon"))) int64x2_t vpaddq_s64(int64x2_t __p0, int64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vpaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vpaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -55608,14 +57266,14 @@ __ai __attribute__((target("neon"))) int16x8_t vpaddq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64_t vpaddd_u64(uint64x2_t __p0) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vpaddd_u64(__p0); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vpaddd_u64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) uint64_t vpaddd_u64(uint64x2_t __p0) { uint64_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (uint64_t) __builtin_neon_vpaddd_u64(__rev0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vpaddd_u64(__rev0)); return __ret; } #endif @@ -55623,14 +57281,14 @@ __ai __attribute__((target("neon"))) uint64_t vpaddd_u64(uint64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vpaddd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpaddd_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpaddd_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vpaddd_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vpaddd_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpaddd_f64(__rev0)); return __ret; } #endif @@ -55638,14 +57296,14 @@ __ai __attribute__((target("neon"))) float64_t vpaddd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64_t vpaddd_s64(int64x2_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vpaddd_s64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vpaddd_s64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) int64_t vpaddd_s64(int64x2_t __p0) { int64_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64_t) __builtin_neon_vpaddd_s64(__rev0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vpaddd_s64(__rev0)); return __ret; } #endif @@ -55653,14 +57311,14 @@ __ai __attribute__((target("neon"))) int64_t vpaddd_s64(int64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vpadds_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpadds_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpadds_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vpadds_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vpadds_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpadds_f32(__rev0)); return __ret; } #endif @@ -55668,16 +57326,16 @@ __ai __attribute__((target("neon"))) float32_t vpadds_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -55685,16 +57343,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vpmaxq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55702,16 +57360,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vpmaxq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -55719,16 +57377,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vpmaxq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -55736,16 +57394,16 @@ __ai __attribute__((target("neon"))) int8x16_t vpmaxq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vpmaxq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55753,16 +57411,16 @@ __ai __attribute__((target("neon"))) float64x2_t vpmaxq_f64(float64x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vpmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55770,16 +57428,16 @@ __ai __attribute__((target("neon"))) float32x4_t vpmaxq_f32(float32x4_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vpmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55787,16 +57445,16 @@ __ai __attribute__((target("neon"))) int32x4_t vpmaxq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vpmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vpmaxq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpmaxq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -55804,14 +57462,14 @@ __ai __attribute__((target("neon"))) int16x8_t vpmaxq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vpmaxqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxqd_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpmaxqd_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vpmaxqd_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vpmaxqd_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpmaxqd_f64(__rev0)); return __ret; } #endif @@ -55819,14 +57477,14 @@ __ai __attribute__((target("neon"))) float64_t vpmaxqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vpmaxs_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxs_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpmaxs_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vpmaxs_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vpmaxs_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpmaxs_f32(__rev0)); return __ret; } #endif @@ -55834,16 +57492,16 @@ __ai __attribute__((target("neon"))) float32_t vpmaxs_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpmaxnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vpmaxnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpmaxnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -55851,16 +57509,16 @@ __ai __attribute__((target("neon"))) float64x2_t vpmaxnmq_f64(float64x2_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpmaxnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vpmaxnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vpmaxnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpmaxnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55868,16 +57526,16 @@ __ai __attribute__((target("neon"))) float32x4_t vpmaxnmq_f32(float32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpmaxnm_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vpmaxnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vpmaxnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpmaxnm_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -55885,14 +57543,14 @@ __ai __attribute__((target("neon"))) float32x2_t vpmaxnm_f32(float32x2_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vpmaxnmqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpmaxnmqd_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vpmaxnmqd_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vpmaxnmqd_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpmaxnmqd_f64(__rev0)); return __ret; } #endif @@ -55900,14 +57558,14 @@ __ai __attribute__((target("neon"))) float64_t vpmaxnmqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vpmaxnms_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmaxnms_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpmaxnms_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vpmaxnms_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vpmaxnms_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpmaxnms_f32(__rev0)); return __ret; } #endif @@ -55915,16 +57573,16 @@ __ai __attribute__((target("neon"))) float32_t vpmaxnms_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vpminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -55932,16 +57590,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vpminq_u8(uint8x16_t __p0, uint8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vpminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -55949,16 +57607,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vpminq_u32(uint32x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vpminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -55966,16 +57624,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vpminq_u16(uint16x8_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vpminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -55983,16 +57641,16 @@ __ai __attribute__((target("neon"))) int8x16_t vpminq_s8(int8x16_t __p0, int8x16 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vpminq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -56000,16 +57658,16 @@ __ai __attribute__((target("neon"))) float64x2_t vpminq_f64(float64x2_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vpminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -56017,16 +57675,16 @@ __ai __attribute__((target("neon"))) float32x4_t vpminq_f32(float32x4_t __p0, fl #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vpminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -56034,16 +57692,16 @@ __ai __attribute__((target("neon"))) int32x4_t vpminq_s32(int32x4_t __p0, int32x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vpminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vpminq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vpminq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -56051,14 +57709,14 @@ __ai __attribute__((target("neon"))) int16x8_t vpminq_s16(int16x8_t __p0, int16x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vpminqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminqd_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpminqd_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vpminqd_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vpminqd_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpminqd_f64(__rev0)); return __ret; } #endif @@ -56066,14 +57724,14 @@ __ai __attribute__((target("neon"))) float64_t vpminqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vpmins_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpmins_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpmins_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vpmins_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vpmins_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpmins_f32(__rev0)); return __ret; } #endif @@ -56081,16 +57739,16 @@ __ai __attribute__((target("neon"))) float32_t vpmins_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpminnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vpminnmq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vpminnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -56098,16 +57756,16 @@ __ai __attribute__((target("neon"))) float64x2_t vpminnmq_f64(float64x2_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpminnmq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vpminnmq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vpminnmq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vpminnmq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -56115,16 +57773,16 @@ __ai __attribute__((target("neon"))) float32x4_t vpminnmq_f32(float32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__p0, (int8x8_t)__p1, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpminnm_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vpminnm_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float32x2_t) __builtin_neon_vpminnm_v((int8x8_t)__rev0, (int8x8_t)__rev1, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vpminnm_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -56132,14 +57790,14 @@ __ai __attribute__((target("neon"))) float32x2_t vpminnm_f32(float32x2_t __p0, f #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64_t vpminnmqd_f64(float64x2_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vpminnmqd_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpminnmqd_f64(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float64_t vpminnmqd_f64(float64x2_t __p0) { float64_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64_t) __builtin_neon_vpminnmqd_f64(__rev0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vpminnmqd_f64(__rev0)); return __ret; } #endif @@ -56147,14 +57805,14 @@ __ai __attribute__((target("neon"))) float64_t vpminnmqd_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32_t vpminnms_f32(float32x2_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vpminnms_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpminnms_f32(__p0)); return __ret; } #else __ai __attribute__((target("neon"))) float32_t vpminnms_f32(float32x2_t __p0) { float32_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32_t) __builtin_neon_vpminnms_f32(__rev0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vpminnms_f32(__rev0)); return __ret; } #endif @@ -56162,92 +57820,92 @@ __ai __attribute__((target("neon"))) float32_t vpminnms_f32(float32x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqabsq_s64(int64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqabsq_s64(int64x2_t __p0) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqabsq_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqabsq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vqabs_s64(int64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqabs_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqabsb_s8(int8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqabsb_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqabsb_s8(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqabss_s32(int32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqabss_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqabss_s32(__p0)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqabsd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqabsd_s64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqabsd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqabsh_s16(int16_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqabsh_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqabsh_s16(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint8_t vqaddb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqaddb_u8(__p0, __p1); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqaddb_u8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vqadds_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqadds_u32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqadds_u32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vqaddd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqaddd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vqaddd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint16_t vqaddh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqaddh_u16(__p0, __p1); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqaddh_u16(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqaddb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqaddb_s8(__p0, __p1); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqaddb_s8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqadds_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqadds_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqadds_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqaddd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqaddd_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqaddd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqaddh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqaddh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqaddh_s16(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqdmlals_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlals_s32(__p0, __p1, __p2); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlals_s32(__p0, __p1, __p2)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqdmlalh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlalh_s16(__p0, __p1, __p2)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -56259,11 +57917,11 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmlal_high_s32(int64x2_t __p0, #else __ai __attribute__((target("neon"))) int64x2_t vqdmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vqdmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -56277,108 +57935,108 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_s16(int32x4_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vqdmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_lane_s32(__p0_628, __p1_628, __p2_628, __p3_628) __extension__ ({ \ - int64x2_t __ret_628; \ - int64x2_t __s0_628 = __p0_628; \ - int32x4_t __s1_628 = __p1_628; \ - int32x2_t __s2_628 = __p2_628; \ - __ret_628 = vqdmlal_s32(__s0_628, vget_high_s32(__s1_628), splat_lane_s32(__s2_628, __p3_628)); \ - __ret_628; \ +#define vqdmlal_high_lane_s32(__p0_644, __p1_644, __p2_644, __p3_644) __extension__ ({ \ + int64x2_t __ret_644; \ + int64x2_t __s0_644 = __p0_644; \ + int32x4_t __s1_644 = __p1_644; \ + int32x2_t __s2_644 = __p2_644; \ + __ret_644 = vqdmlal_s32(__s0_644, vget_high_s32(__s1_644), splat_lane_s32(__s2_644, __p3_644)); \ + __ret_644; \ }) #else -#define vqdmlal_high_lane_s32(__p0_629, __p1_629, __p2_629, __p3_629) __extension__ ({ \ - int64x2_t __ret_629; \ - int64x2_t __s0_629 = __p0_629; \ - int32x4_t __s1_629 = __p1_629; \ - int32x2_t __s2_629 = __p2_629; \ - int64x2_t __rev0_629; __rev0_629 = __builtin_shufflevector(__s0_629, __s0_629, 1, 0); \ - int32x4_t __rev1_629; __rev1_629 = __builtin_shufflevector(__s1_629, __s1_629, 3, 2, 1, 0); \ - int32x2_t __rev2_629; __rev2_629 = __builtin_shufflevector(__s2_629, __s2_629, 1, 0); \ - __ret_629 = __noswap_vqdmlal_s32(__rev0_629, __noswap_vget_high_s32(__rev1_629), __noswap_splat_lane_s32(__rev2_629, __p3_629)); \ - __ret_629 = __builtin_shufflevector(__ret_629, __ret_629, 1, 0); \ - __ret_629; \ +#define vqdmlal_high_lane_s32(__p0_645, __p1_645, __p2_645, __p3_645) __extension__ ({ \ + int64x2_t __ret_645; \ + int64x2_t __s0_645 = __p0_645; \ + int32x4_t __s1_645 = __p1_645; \ + int32x2_t __s2_645 = __p2_645; \ + int64x2_t __rev0_645; __rev0_645 = __builtin_shufflevector(__s0_645, __s0_645, __lane_reverse_128_64); \ + int32x4_t __rev1_645; __rev1_645 = __builtin_shufflevector(__s1_645, __s1_645, __lane_reverse_128_32); \ + int32x2_t __rev2_645; __rev2_645 = __builtin_shufflevector(__s2_645, __s2_645, __lane_reverse_64_32); \ + __ret_645 = __noswap_vqdmlal_s32(__rev0_645, __noswap_vget_high_s32(__rev1_645), __noswap_splat_lane_s32(__rev2_645, __p3_645)); \ + __ret_645 = __builtin_shufflevector(__ret_645, __ret_645, __lane_reverse_128_64); \ + __ret_645; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_lane_s16(__p0_630, __p1_630, __p2_630, __p3_630) __extension__ ({ \ - int32x4_t __ret_630; \ - int32x4_t __s0_630 = __p0_630; \ - int16x8_t __s1_630 = __p1_630; \ - int16x4_t __s2_630 = __p2_630; \ - __ret_630 = vqdmlal_s16(__s0_630, vget_high_s16(__s1_630), splat_lane_s16(__s2_630, __p3_630)); \ - __ret_630; \ +#define vqdmlal_high_lane_s16(__p0_646, __p1_646, __p2_646, __p3_646) __extension__ ({ \ + int32x4_t __ret_646; \ + int32x4_t __s0_646 = __p0_646; \ + int16x8_t __s1_646 = __p1_646; \ + int16x4_t __s2_646 = __p2_646; \ + __ret_646 = vqdmlal_s16(__s0_646, vget_high_s16(__s1_646), splat_lane_s16(__s2_646, __p3_646)); \ + __ret_646; \ }) #else -#define vqdmlal_high_lane_s16(__p0_631, __p1_631, __p2_631, __p3_631) __extension__ ({ \ - int32x4_t __ret_631; \ - int32x4_t __s0_631 = __p0_631; \ - int16x8_t __s1_631 = __p1_631; \ - int16x4_t __s2_631 = __p2_631; \ - int32x4_t __rev0_631; __rev0_631 = __builtin_shufflevector(__s0_631, __s0_631, 3, 2, 1, 0); \ - int16x8_t __rev1_631; __rev1_631 = __builtin_shufflevector(__s1_631, __s1_631, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_631; __rev2_631 = __builtin_shufflevector(__s2_631, __s2_631, 3, 2, 1, 0); \ - __ret_631 = __noswap_vqdmlal_s16(__rev0_631, __noswap_vget_high_s16(__rev1_631), __noswap_splat_lane_s16(__rev2_631, __p3_631)); \ - __ret_631 = __builtin_shufflevector(__ret_631, __ret_631, 3, 2, 1, 0); \ - __ret_631; \ +#define vqdmlal_high_lane_s16(__p0_647, __p1_647, __p2_647, __p3_647) __extension__ ({ \ + int32x4_t __ret_647; \ + int32x4_t __s0_647 = __p0_647; \ + int16x8_t __s1_647 = __p1_647; \ + int16x4_t __s2_647 = __p2_647; \ + int32x4_t __rev0_647; __rev0_647 = __builtin_shufflevector(__s0_647, __s0_647, __lane_reverse_128_32); \ + int16x8_t __rev1_647; __rev1_647 = __builtin_shufflevector(__s1_647, __s1_647, __lane_reverse_128_16); \ + int16x4_t __rev2_647; __rev2_647 = __builtin_shufflevector(__s2_647, __s2_647, __lane_reverse_64_16); \ + __ret_647 = __noswap_vqdmlal_s16(__rev0_647, __noswap_vget_high_s16(__rev1_647), __noswap_splat_lane_s16(__rev2_647, __p3_647)); \ + __ret_647 = __builtin_shufflevector(__ret_647, __ret_647, __lane_reverse_128_32); \ + __ret_647; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_laneq_s32(__p0_632, __p1_632, __p2_632, __p3_632) __extension__ ({ \ - int64x2_t __ret_632; \ - int64x2_t __s0_632 = __p0_632; \ - int32x4_t __s1_632 = __p1_632; \ - int32x4_t __s2_632 = __p2_632; \ - __ret_632 = vqdmlal_s32(__s0_632, vget_high_s32(__s1_632), splat_laneq_s32(__s2_632, __p3_632)); \ - __ret_632; \ +#define vqdmlal_high_laneq_s32(__p0_648, __p1_648, __p2_648, __p3_648) __extension__ ({ \ + int64x2_t __ret_648; \ + int64x2_t __s0_648 = __p0_648; \ + int32x4_t __s1_648 = __p1_648; \ + int32x4_t __s2_648 = __p2_648; \ + __ret_648 = vqdmlal_s32(__s0_648, vget_high_s32(__s1_648), splat_laneq_s32(__s2_648, __p3_648)); \ + __ret_648; \ }) #else -#define vqdmlal_high_laneq_s32(__p0_633, __p1_633, __p2_633, __p3_633) __extension__ ({ \ - int64x2_t __ret_633; \ - int64x2_t __s0_633 = __p0_633; \ - int32x4_t __s1_633 = __p1_633; \ - int32x4_t __s2_633 = __p2_633; \ - int64x2_t __rev0_633; __rev0_633 = __builtin_shufflevector(__s0_633, __s0_633, 1, 0); \ - int32x4_t __rev1_633; __rev1_633 = __builtin_shufflevector(__s1_633, __s1_633, 3, 2, 1, 0); \ - int32x4_t __rev2_633; __rev2_633 = __builtin_shufflevector(__s2_633, __s2_633, 3, 2, 1, 0); \ - __ret_633 = __noswap_vqdmlal_s32(__rev0_633, __noswap_vget_high_s32(__rev1_633), __noswap_splat_laneq_s32(__rev2_633, __p3_633)); \ - __ret_633 = __builtin_shufflevector(__ret_633, __ret_633, 1, 0); \ - __ret_633; \ +#define vqdmlal_high_laneq_s32(__p0_649, __p1_649, __p2_649, __p3_649) __extension__ ({ \ + int64x2_t __ret_649; \ + int64x2_t __s0_649 = __p0_649; \ + int32x4_t __s1_649 = __p1_649; \ + int32x4_t __s2_649 = __p2_649; \ + int64x2_t __rev0_649; __rev0_649 = __builtin_shufflevector(__s0_649, __s0_649, __lane_reverse_128_64); \ + int32x4_t __rev1_649; __rev1_649 = __builtin_shufflevector(__s1_649, __s1_649, __lane_reverse_128_32); \ + int32x4_t __rev2_649; __rev2_649 = __builtin_shufflevector(__s2_649, __s2_649, __lane_reverse_128_32); \ + __ret_649 = __noswap_vqdmlal_s32(__rev0_649, __noswap_vget_high_s32(__rev1_649), __noswap_splat_laneq_s32(__rev2_649, __p3_649)); \ + __ret_649 = __builtin_shufflevector(__ret_649, __ret_649, __lane_reverse_128_64); \ + __ret_649; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_laneq_s16(__p0_634, __p1_634, __p2_634, __p3_634) __extension__ ({ \ - int32x4_t __ret_634; \ - int32x4_t __s0_634 = __p0_634; \ - int16x8_t __s1_634 = __p1_634; \ - int16x8_t __s2_634 = __p2_634; \ - __ret_634 = vqdmlal_s16(__s0_634, vget_high_s16(__s1_634), splat_laneq_s16(__s2_634, __p3_634)); \ - __ret_634; \ +#define vqdmlal_high_laneq_s16(__p0_650, __p1_650, __p2_650, __p3_650) __extension__ ({ \ + int32x4_t __ret_650; \ + int32x4_t __s0_650 = __p0_650; \ + int16x8_t __s1_650 = __p1_650; \ + int16x8_t __s2_650 = __p2_650; \ + __ret_650 = vqdmlal_s16(__s0_650, vget_high_s16(__s1_650), splat_laneq_s16(__s2_650, __p3_650)); \ + __ret_650; \ }) #else -#define vqdmlal_high_laneq_s16(__p0_635, __p1_635, __p2_635, __p3_635) __extension__ ({ \ - int32x4_t __ret_635; \ - int32x4_t __s0_635 = __p0_635; \ - int16x8_t __s1_635 = __p1_635; \ - int16x8_t __s2_635 = __p2_635; \ - int32x4_t __rev0_635; __rev0_635 = __builtin_shufflevector(__s0_635, __s0_635, 3, 2, 1, 0); \ - int16x8_t __rev1_635; __rev1_635 = __builtin_shufflevector(__s1_635, __s1_635, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_635; __rev2_635 = __builtin_shufflevector(__s2_635, __s2_635, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_635 = __noswap_vqdmlal_s16(__rev0_635, __noswap_vget_high_s16(__rev1_635), __noswap_splat_laneq_s16(__rev2_635, __p3_635)); \ - __ret_635 = __builtin_shufflevector(__ret_635, __ret_635, 3, 2, 1, 0); \ - __ret_635; \ +#define vqdmlal_high_laneq_s16(__p0_651, __p1_651, __p2_651, __p3_651) __extension__ ({ \ + int32x4_t __ret_651; \ + int32x4_t __s0_651 = __p0_651; \ + int16x8_t __s1_651 = __p1_651; \ + int16x8_t __s2_651 = __p2_651; \ + int32x4_t __rev0_651; __rev0_651 = __builtin_shufflevector(__s0_651, __s0_651, __lane_reverse_128_32); \ + int16x8_t __rev1_651; __rev1_651 = __builtin_shufflevector(__s1_651, __s1_651, __lane_reverse_128_16); \ + int16x8_t __rev2_651; __rev2_651 = __builtin_shufflevector(__s2_651, __s2_651, __lane_reverse_128_16); \ + __ret_651 = __noswap_vqdmlal_s16(__rev0_651, __noswap_vget_high_s16(__rev1_651), __noswap_splat_laneq_s16(__rev2_651, __p3_651)); \ + __ret_651 = __builtin_shufflevector(__ret_651, __ret_651, __lane_reverse_128_32); \ + __ret_651; \ }) #endif @@ -56391,10 +58049,10 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmlal_high_n_s32(int64x2_t __p0 #else __ai __attribute__((target("neon"))) int64x2_t vqdmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vqdmlal_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -56408,10 +58066,10 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 #else __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vqdmlal_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -56422,7 +58080,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ - __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56431,8 +58089,8 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (int64_t) __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __rev2, __p3); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlals_lane_s32(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif @@ -56443,7 +58101,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56452,8 +58110,8 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (int32_t) __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __rev2, __p3); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlalh_lane_s16(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif @@ -56464,7 +58122,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ - __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56473,8 +58131,8 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (int64_t) __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __rev2, __p3); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlals_laneq_s32(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif @@ -56485,7 +58143,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ - __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56494,68 +58152,68 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlal_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int32_t) __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __rev2, __p3); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlalh_laneq_s16(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_laneq_s32(__p0_636, __p1_636, __p2_636, __p3_636) __extension__ ({ \ - int64x2_t __ret_636; \ - int64x2_t __s0_636 = __p0_636; \ - int32x2_t __s1_636 = __p1_636; \ - int32x4_t __s2_636 = __p2_636; \ - __ret_636 = vqdmlal_s32(__s0_636, __s1_636, splat_laneq_s32(__s2_636, __p3_636)); \ - __ret_636; \ +#define vqdmlal_laneq_s32(__p0_652, __p1_652, __p2_652, __p3_652) __extension__ ({ \ + int64x2_t __ret_652; \ + int64x2_t __s0_652 = __p0_652; \ + int32x2_t __s1_652 = __p1_652; \ + int32x4_t __s2_652 = __p2_652; \ + __ret_652 = vqdmlal_s32(__s0_652, __s1_652, splat_laneq_s32(__s2_652, __p3_652)); \ + __ret_652; \ }) #else -#define vqdmlal_laneq_s32(__p0_637, __p1_637, __p2_637, __p3_637) __extension__ ({ \ - int64x2_t __ret_637; \ - int64x2_t __s0_637 = __p0_637; \ - int32x2_t __s1_637 = __p1_637; \ - int32x4_t __s2_637 = __p2_637; \ - int64x2_t __rev0_637; __rev0_637 = __builtin_shufflevector(__s0_637, __s0_637, 1, 0); \ - int32x2_t __rev1_637; __rev1_637 = __builtin_shufflevector(__s1_637, __s1_637, 1, 0); \ - int32x4_t __rev2_637; __rev2_637 = __builtin_shufflevector(__s2_637, __s2_637, 3, 2, 1, 0); \ - __ret_637 = __noswap_vqdmlal_s32(__rev0_637, __rev1_637, __noswap_splat_laneq_s32(__rev2_637, __p3_637)); \ - __ret_637 = __builtin_shufflevector(__ret_637, __ret_637, 1, 0); \ - __ret_637; \ +#define vqdmlal_laneq_s32(__p0_653, __p1_653, __p2_653, __p3_653) __extension__ ({ \ + int64x2_t __ret_653; \ + int64x2_t __s0_653 = __p0_653; \ + int32x2_t __s1_653 = __p1_653; \ + int32x4_t __s2_653 = __p2_653; \ + int64x2_t __rev0_653; __rev0_653 = __builtin_shufflevector(__s0_653, __s0_653, __lane_reverse_128_64); \ + int32x2_t __rev1_653; __rev1_653 = __builtin_shufflevector(__s1_653, __s1_653, __lane_reverse_64_32); \ + int32x4_t __rev2_653; __rev2_653 = __builtin_shufflevector(__s2_653, __s2_653, __lane_reverse_128_32); \ + __ret_653 = __noswap_vqdmlal_s32(__rev0_653, __rev1_653, __noswap_splat_laneq_s32(__rev2_653, __p3_653)); \ + __ret_653 = __builtin_shufflevector(__ret_653, __ret_653, __lane_reverse_128_64); \ + __ret_653; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_laneq_s16(__p0_638, __p1_638, __p2_638, __p3_638) __extension__ ({ \ - int32x4_t __ret_638; \ - int32x4_t __s0_638 = __p0_638; \ - int16x4_t __s1_638 = __p1_638; \ - int16x8_t __s2_638 = __p2_638; \ - __ret_638 = vqdmlal_s16(__s0_638, __s1_638, splat_laneq_s16(__s2_638, __p3_638)); \ - __ret_638; \ +#define vqdmlal_laneq_s16(__p0_654, __p1_654, __p2_654, __p3_654) __extension__ ({ \ + int32x4_t __ret_654; \ + int32x4_t __s0_654 = __p0_654; \ + int16x4_t __s1_654 = __p1_654; \ + int16x8_t __s2_654 = __p2_654; \ + __ret_654 = vqdmlal_s16(__s0_654, __s1_654, splat_laneq_s16(__s2_654, __p3_654)); \ + __ret_654; \ }) #else -#define vqdmlal_laneq_s16(__p0_639, __p1_639, __p2_639, __p3_639) __extension__ ({ \ - int32x4_t __ret_639; \ - int32x4_t __s0_639 = __p0_639; \ - int16x4_t __s1_639 = __p1_639; \ - int16x8_t __s2_639 = __p2_639; \ - int32x4_t __rev0_639; __rev0_639 = __builtin_shufflevector(__s0_639, __s0_639, 3, 2, 1, 0); \ - int16x4_t __rev1_639; __rev1_639 = __builtin_shufflevector(__s1_639, __s1_639, 3, 2, 1, 0); \ - int16x8_t __rev2_639; __rev2_639 = __builtin_shufflevector(__s2_639, __s2_639, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_639 = __noswap_vqdmlal_s16(__rev0_639, __rev1_639, __noswap_splat_laneq_s16(__rev2_639, __p3_639)); \ - __ret_639 = __builtin_shufflevector(__ret_639, __ret_639, 3, 2, 1, 0); \ - __ret_639; \ +#define vqdmlal_laneq_s16(__p0_655, __p1_655, __p2_655, __p3_655) __extension__ ({ \ + int32x4_t __ret_655; \ + int32x4_t __s0_655 = __p0_655; \ + int16x4_t __s1_655 = __p1_655; \ + int16x8_t __s2_655 = __p2_655; \ + int32x4_t __rev0_655; __rev0_655 = __builtin_shufflevector(__s0_655, __s0_655, __lane_reverse_128_32); \ + int16x4_t __rev1_655; __rev1_655 = __builtin_shufflevector(__s1_655, __s1_655, __lane_reverse_64_16); \ + int16x8_t __rev2_655; __rev2_655 = __builtin_shufflevector(__s2_655, __s2_655, __lane_reverse_128_16); \ + __ret_655 = __noswap_vqdmlal_s16(__rev0_655, __rev1_655, __noswap_splat_laneq_s16(__rev2_655, __p3_655)); \ + __ret_655 = __builtin_shufflevector(__ret_655, __ret_655, __lane_reverse_128_32); \ + __ret_655; \ }) #endif __ai __attribute__((target("neon"))) int64_t vqdmlsls_s32(int64_t __p0, int32_t __p1, int32_t __p2) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlsls_s32(__p0, __p1, __p2)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqdmlslh_s16(int32_t __p0, int16_t __p1, int16_t __p2) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlslh_s16(__p0, __p1, __p2)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -56567,11 +58225,11 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_high_s32(int64x2_t __p0, #else __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vqdmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -56585,108 +58243,108 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_s16(int32x4_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vqdmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_lane_s32(__p0_640, __p1_640, __p2_640, __p3_640) __extension__ ({ \ - int64x2_t __ret_640; \ - int64x2_t __s0_640 = __p0_640; \ - int32x4_t __s1_640 = __p1_640; \ - int32x2_t __s2_640 = __p2_640; \ - __ret_640 = vqdmlsl_s32(__s0_640, vget_high_s32(__s1_640), splat_lane_s32(__s2_640, __p3_640)); \ - __ret_640; \ +#define vqdmlsl_high_lane_s32(__p0_656, __p1_656, __p2_656, __p3_656) __extension__ ({ \ + int64x2_t __ret_656; \ + int64x2_t __s0_656 = __p0_656; \ + int32x4_t __s1_656 = __p1_656; \ + int32x2_t __s2_656 = __p2_656; \ + __ret_656 = vqdmlsl_s32(__s0_656, vget_high_s32(__s1_656), splat_lane_s32(__s2_656, __p3_656)); \ + __ret_656; \ }) #else -#define vqdmlsl_high_lane_s32(__p0_641, __p1_641, __p2_641, __p3_641) __extension__ ({ \ - int64x2_t __ret_641; \ - int64x2_t __s0_641 = __p0_641; \ - int32x4_t __s1_641 = __p1_641; \ - int32x2_t __s2_641 = __p2_641; \ - int64x2_t __rev0_641; __rev0_641 = __builtin_shufflevector(__s0_641, __s0_641, 1, 0); \ - int32x4_t __rev1_641; __rev1_641 = __builtin_shufflevector(__s1_641, __s1_641, 3, 2, 1, 0); \ - int32x2_t __rev2_641; __rev2_641 = __builtin_shufflevector(__s2_641, __s2_641, 1, 0); \ - __ret_641 = __noswap_vqdmlsl_s32(__rev0_641, __noswap_vget_high_s32(__rev1_641), __noswap_splat_lane_s32(__rev2_641, __p3_641)); \ - __ret_641 = __builtin_shufflevector(__ret_641, __ret_641, 1, 0); \ - __ret_641; \ +#define vqdmlsl_high_lane_s32(__p0_657, __p1_657, __p2_657, __p3_657) __extension__ ({ \ + int64x2_t __ret_657; \ + int64x2_t __s0_657 = __p0_657; \ + int32x4_t __s1_657 = __p1_657; \ + int32x2_t __s2_657 = __p2_657; \ + int64x2_t __rev0_657; __rev0_657 = __builtin_shufflevector(__s0_657, __s0_657, __lane_reverse_128_64); \ + int32x4_t __rev1_657; __rev1_657 = __builtin_shufflevector(__s1_657, __s1_657, __lane_reverse_128_32); \ + int32x2_t __rev2_657; __rev2_657 = __builtin_shufflevector(__s2_657, __s2_657, __lane_reverse_64_32); \ + __ret_657 = __noswap_vqdmlsl_s32(__rev0_657, __noswap_vget_high_s32(__rev1_657), __noswap_splat_lane_s32(__rev2_657, __p3_657)); \ + __ret_657 = __builtin_shufflevector(__ret_657, __ret_657, __lane_reverse_128_64); \ + __ret_657; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_lane_s16(__p0_642, __p1_642, __p2_642, __p3_642) __extension__ ({ \ - int32x4_t __ret_642; \ - int32x4_t __s0_642 = __p0_642; \ - int16x8_t __s1_642 = __p1_642; \ - int16x4_t __s2_642 = __p2_642; \ - __ret_642 = vqdmlsl_s16(__s0_642, vget_high_s16(__s1_642), splat_lane_s16(__s2_642, __p3_642)); \ - __ret_642; \ +#define vqdmlsl_high_lane_s16(__p0_658, __p1_658, __p2_658, __p3_658) __extension__ ({ \ + int32x4_t __ret_658; \ + int32x4_t __s0_658 = __p0_658; \ + int16x8_t __s1_658 = __p1_658; \ + int16x4_t __s2_658 = __p2_658; \ + __ret_658 = vqdmlsl_s16(__s0_658, vget_high_s16(__s1_658), splat_lane_s16(__s2_658, __p3_658)); \ + __ret_658; \ }) #else -#define vqdmlsl_high_lane_s16(__p0_643, __p1_643, __p2_643, __p3_643) __extension__ ({ \ - int32x4_t __ret_643; \ - int32x4_t __s0_643 = __p0_643; \ - int16x8_t __s1_643 = __p1_643; \ - int16x4_t __s2_643 = __p2_643; \ - int32x4_t __rev0_643; __rev0_643 = __builtin_shufflevector(__s0_643, __s0_643, 3, 2, 1, 0); \ - int16x8_t __rev1_643; __rev1_643 = __builtin_shufflevector(__s1_643, __s1_643, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_643; __rev2_643 = __builtin_shufflevector(__s2_643, __s2_643, 3, 2, 1, 0); \ - __ret_643 = __noswap_vqdmlsl_s16(__rev0_643, __noswap_vget_high_s16(__rev1_643), __noswap_splat_lane_s16(__rev2_643, __p3_643)); \ - __ret_643 = __builtin_shufflevector(__ret_643, __ret_643, 3, 2, 1, 0); \ - __ret_643; \ +#define vqdmlsl_high_lane_s16(__p0_659, __p1_659, __p2_659, __p3_659) __extension__ ({ \ + int32x4_t __ret_659; \ + int32x4_t __s0_659 = __p0_659; \ + int16x8_t __s1_659 = __p1_659; \ + int16x4_t __s2_659 = __p2_659; \ + int32x4_t __rev0_659; __rev0_659 = __builtin_shufflevector(__s0_659, __s0_659, __lane_reverse_128_32); \ + int16x8_t __rev1_659; __rev1_659 = __builtin_shufflevector(__s1_659, __s1_659, __lane_reverse_128_16); \ + int16x4_t __rev2_659; __rev2_659 = __builtin_shufflevector(__s2_659, __s2_659, __lane_reverse_64_16); \ + __ret_659 = __noswap_vqdmlsl_s16(__rev0_659, __noswap_vget_high_s16(__rev1_659), __noswap_splat_lane_s16(__rev2_659, __p3_659)); \ + __ret_659 = __builtin_shufflevector(__ret_659, __ret_659, __lane_reverse_128_32); \ + __ret_659; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_laneq_s32(__p0_644, __p1_644, __p2_644, __p3_644) __extension__ ({ \ - int64x2_t __ret_644; \ - int64x2_t __s0_644 = __p0_644; \ - int32x4_t __s1_644 = __p1_644; \ - int32x4_t __s2_644 = __p2_644; \ - __ret_644 = vqdmlsl_s32(__s0_644, vget_high_s32(__s1_644), splat_laneq_s32(__s2_644, __p3_644)); \ - __ret_644; \ +#define vqdmlsl_high_laneq_s32(__p0_660, __p1_660, __p2_660, __p3_660) __extension__ ({ \ + int64x2_t __ret_660; \ + int64x2_t __s0_660 = __p0_660; \ + int32x4_t __s1_660 = __p1_660; \ + int32x4_t __s2_660 = __p2_660; \ + __ret_660 = vqdmlsl_s32(__s0_660, vget_high_s32(__s1_660), splat_laneq_s32(__s2_660, __p3_660)); \ + __ret_660; \ }) #else -#define vqdmlsl_high_laneq_s32(__p0_645, __p1_645, __p2_645, __p3_645) __extension__ ({ \ - int64x2_t __ret_645; \ - int64x2_t __s0_645 = __p0_645; \ - int32x4_t __s1_645 = __p1_645; \ - int32x4_t __s2_645 = __p2_645; \ - int64x2_t __rev0_645; __rev0_645 = __builtin_shufflevector(__s0_645, __s0_645, 1, 0); \ - int32x4_t __rev1_645; __rev1_645 = __builtin_shufflevector(__s1_645, __s1_645, 3, 2, 1, 0); \ - int32x4_t __rev2_645; __rev2_645 = __builtin_shufflevector(__s2_645, __s2_645, 3, 2, 1, 0); \ - __ret_645 = __noswap_vqdmlsl_s32(__rev0_645, __noswap_vget_high_s32(__rev1_645), __noswap_splat_laneq_s32(__rev2_645, __p3_645)); \ - __ret_645 = __builtin_shufflevector(__ret_645, __ret_645, 1, 0); \ - __ret_645; \ +#define vqdmlsl_high_laneq_s32(__p0_661, __p1_661, __p2_661, __p3_661) __extension__ ({ \ + int64x2_t __ret_661; \ + int64x2_t __s0_661 = __p0_661; \ + int32x4_t __s1_661 = __p1_661; \ + int32x4_t __s2_661 = __p2_661; \ + int64x2_t __rev0_661; __rev0_661 = __builtin_shufflevector(__s0_661, __s0_661, __lane_reverse_128_64); \ + int32x4_t __rev1_661; __rev1_661 = __builtin_shufflevector(__s1_661, __s1_661, __lane_reverse_128_32); \ + int32x4_t __rev2_661; __rev2_661 = __builtin_shufflevector(__s2_661, __s2_661, __lane_reverse_128_32); \ + __ret_661 = __noswap_vqdmlsl_s32(__rev0_661, __noswap_vget_high_s32(__rev1_661), __noswap_splat_laneq_s32(__rev2_661, __p3_661)); \ + __ret_661 = __builtin_shufflevector(__ret_661, __ret_661, __lane_reverse_128_64); \ + __ret_661; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_laneq_s16(__p0_646, __p1_646, __p2_646, __p3_646) __extension__ ({ \ - int32x4_t __ret_646; \ - int32x4_t __s0_646 = __p0_646; \ - int16x8_t __s1_646 = __p1_646; \ - int16x8_t __s2_646 = __p2_646; \ - __ret_646 = vqdmlsl_s16(__s0_646, vget_high_s16(__s1_646), splat_laneq_s16(__s2_646, __p3_646)); \ - __ret_646; \ +#define vqdmlsl_high_laneq_s16(__p0_662, __p1_662, __p2_662, __p3_662) __extension__ ({ \ + int32x4_t __ret_662; \ + int32x4_t __s0_662 = __p0_662; \ + int16x8_t __s1_662 = __p1_662; \ + int16x8_t __s2_662 = __p2_662; \ + __ret_662 = vqdmlsl_s16(__s0_662, vget_high_s16(__s1_662), splat_laneq_s16(__s2_662, __p3_662)); \ + __ret_662; \ }) #else -#define vqdmlsl_high_laneq_s16(__p0_647, __p1_647, __p2_647, __p3_647) __extension__ ({ \ - int32x4_t __ret_647; \ - int32x4_t __s0_647 = __p0_647; \ - int16x8_t __s1_647 = __p1_647; \ - int16x8_t __s2_647 = __p2_647; \ - int32x4_t __rev0_647; __rev0_647 = __builtin_shufflevector(__s0_647, __s0_647, 3, 2, 1, 0); \ - int16x8_t __rev1_647; __rev1_647 = __builtin_shufflevector(__s1_647, __s1_647, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_647; __rev2_647 = __builtin_shufflevector(__s2_647, __s2_647, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_647 = __noswap_vqdmlsl_s16(__rev0_647, __noswap_vget_high_s16(__rev1_647), __noswap_splat_laneq_s16(__rev2_647, __p3_647)); \ - __ret_647 = __builtin_shufflevector(__ret_647, __ret_647, 3, 2, 1, 0); \ - __ret_647; \ +#define vqdmlsl_high_laneq_s16(__p0_663, __p1_663, __p2_663, __p3_663) __extension__ ({ \ + int32x4_t __ret_663; \ + int32x4_t __s0_663 = __p0_663; \ + int16x8_t __s1_663 = __p1_663; \ + int16x8_t __s2_663 = __p2_663; \ + int32x4_t __rev0_663; __rev0_663 = __builtin_shufflevector(__s0_663, __s0_663, __lane_reverse_128_32); \ + int16x8_t __rev1_663; __rev1_663 = __builtin_shufflevector(__s1_663, __s1_663, __lane_reverse_128_16); \ + int16x8_t __rev2_663; __rev2_663 = __builtin_shufflevector(__s2_663, __s2_663, __lane_reverse_128_16); \ + __ret_663 = __noswap_vqdmlsl_s16(__rev0_663, __noswap_vget_high_s16(__rev1_663), __noswap_splat_laneq_s16(__rev2_663, __p3_663)); \ + __ret_663 = __builtin_shufflevector(__ret_663, __ret_663, __lane_reverse_128_32); \ + __ret_663; \ }) #endif @@ -56699,10 +58357,10 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0 #else __ai __attribute__((target("neon"))) int64x2_t vqdmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vqdmlsl_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -56716,10 +58374,10 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 #else __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vqdmlsl_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -56730,7 +58388,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56739,8 +58397,8 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x2_t __s2 = __p2; \ - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 1, 0); \ - __ret = (int64_t) __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __rev2, __p3); \ + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlsls_lane_s32(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif @@ -56751,7 +58409,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56760,8 +58418,8 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x4_t __s2 = __p2; \ - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (int32_t) __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __rev2, __p3); \ + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlslh_lane_s16(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif @@ -56772,7 +58430,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ - __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56781,8 +58439,8 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int64_t __s0 = __p0; \ int32_t __s1 = __p1; \ int32x4_t __s2 = __p2; \ - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (int64_t) __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __rev2, __p3); \ + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmlsls_laneq_s32(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif @@ -56793,7 +58451,7 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ - __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __s2, __p3); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __s2, __p3)); \ __ret; \ }) #else @@ -56802,68 +58460,68 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0 int32_t __s0 = __p0; \ int16_t __s1 = __p1; \ int16x8_t __s2 = __p2; \ - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int32_t) __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __rev2, __p3); \ + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmlslh_laneq_s16(__s0, __s1, __rev2, __p3)); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_laneq_s32(__p0_648, __p1_648, __p2_648, __p3_648) __extension__ ({ \ - int64x2_t __ret_648; \ - int64x2_t __s0_648 = __p0_648; \ - int32x2_t __s1_648 = __p1_648; \ - int32x4_t __s2_648 = __p2_648; \ - __ret_648 = vqdmlsl_s32(__s0_648, __s1_648, splat_laneq_s32(__s2_648, __p3_648)); \ - __ret_648; \ +#define vqdmlsl_laneq_s32(__p0_664, __p1_664, __p2_664, __p3_664) __extension__ ({ \ + int64x2_t __ret_664; \ + int64x2_t __s0_664 = __p0_664; \ + int32x2_t __s1_664 = __p1_664; \ + int32x4_t __s2_664 = __p2_664; \ + __ret_664 = vqdmlsl_s32(__s0_664, __s1_664, splat_laneq_s32(__s2_664, __p3_664)); \ + __ret_664; \ }) #else -#define vqdmlsl_laneq_s32(__p0_649, __p1_649, __p2_649, __p3_649) __extension__ ({ \ - int64x2_t __ret_649; \ - int64x2_t __s0_649 = __p0_649; \ - int32x2_t __s1_649 = __p1_649; \ - int32x4_t __s2_649 = __p2_649; \ - int64x2_t __rev0_649; __rev0_649 = __builtin_shufflevector(__s0_649, __s0_649, 1, 0); \ - int32x2_t __rev1_649; __rev1_649 = __builtin_shufflevector(__s1_649, __s1_649, 1, 0); \ - int32x4_t __rev2_649; __rev2_649 = __builtin_shufflevector(__s2_649, __s2_649, 3, 2, 1, 0); \ - __ret_649 = __noswap_vqdmlsl_s32(__rev0_649, __rev1_649, __noswap_splat_laneq_s32(__rev2_649, __p3_649)); \ - __ret_649 = __builtin_shufflevector(__ret_649, __ret_649, 1, 0); \ - __ret_649; \ +#define vqdmlsl_laneq_s32(__p0_665, __p1_665, __p2_665, __p3_665) __extension__ ({ \ + int64x2_t __ret_665; \ + int64x2_t __s0_665 = __p0_665; \ + int32x2_t __s1_665 = __p1_665; \ + int32x4_t __s2_665 = __p2_665; \ + int64x2_t __rev0_665; __rev0_665 = __builtin_shufflevector(__s0_665, __s0_665, __lane_reverse_128_64); \ + int32x2_t __rev1_665; __rev1_665 = __builtin_shufflevector(__s1_665, __s1_665, __lane_reverse_64_32); \ + int32x4_t __rev2_665; __rev2_665 = __builtin_shufflevector(__s2_665, __s2_665, __lane_reverse_128_32); \ + __ret_665 = __noswap_vqdmlsl_s32(__rev0_665, __rev1_665, __noswap_splat_laneq_s32(__rev2_665, __p3_665)); \ + __ret_665 = __builtin_shufflevector(__ret_665, __ret_665, __lane_reverse_128_64); \ + __ret_665; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_laneq_s16(__p0_650, __p1_650, __p2_650, __p3_650) __extension__ ({ \ - int32x4_t __ret_650; \ - int32x4_t __s0_650 = __p0_650; \ - int16x4_t __s1_650 = __p1_650; \ - int16x8_t __s2_650 = __p2_650; \ - __ret_650 = vqdmlsl_s16(__s0_650, __s1_650, splat_laneq_s16(__s2_650, __p3_650)); \ - __ret_650; \ +#define vqdmlsl_laneq_s16(__p0_666, __p1_666, __p2_666, __p3_666) __extension__ ({ \ + int32x4_t __ret_666; \ + int32x4_t __s0_666 = __p0_666; \ + int16x4_t __s1_666 = __p1_666; \ + int16x8_t __s2_666 = __p2_666; \ + __ret_666 = vqdmlsl_s16(__s0_666, __s1_666, splat_laneq_s16(__s2_666, __p3_666)); \ + __ret_666; \ }) #else -#define vqdmlsl_laneq_s16(__p0_651, __p1_651, __p2_651, __p3_651) __extension__ ({ \ - int32x4_t __ret_651; \ - int32x4_t __s0_651 = __p0_651; \ - int16x4_t __s1_651 = __p1_651; \ - int16x8_t __s2_651 = __p2_651; \ - int32x4_t __rev0_651; __rev0_651 = __builtin_shufflevector(__s0_651, __s0_651, 3, 2, 1, 0); \ - int16x4_t __rev1_651; __rev1_651 = __builtin_shufflevector(__s1_651, __s1_651, 3, 2, 1, 0); \ - int16x8_t __rev2_651; __rev2_651 = __builtin_shufflevector(__s2_651, __s2_651, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_651 = __noswap_vqdmlsl_s16(__rev0_651, __rev1_651, __noswap_splat_laneq_s16(__rev2_651, __p3_651)); \ - __ret_651 = __builtin_shufflevector(__ret_651, __ret_651, 3, 2, 1, 0); \ - __ret_651; \ +#define vqdmlsl_laneq_s16(__p0_667, __p1_667, __p2_667, __p3_667) __extension__ ({ \ + int32x4_t __ret_667; \ + int32x4_t __s0_667 = __p0_667; \ + int16x4_t __s1_667 = __p1_667; \ + int16x8_t __s2_667 = __p2_667; \ + int32x4_t __rev0_667; __rev0_667 = __builtin_shufflevector(__s0_667, __s0_667, __lane_reverse_128_32); \ + int16x4_t __rev1_667; __rev1_667 = __builtin_shufflevector(__s1_667, __s1_667, __lane_reverse_64_16); \ + int16x8_t __rev2_667; __rev2_667 = __builtin_shufflevector(__s2_667, __s2_667, __lane_reverse_128_16); \ + __ret_667 = __noswap_vqdmlsl_s16(__rev0_667, __rev1_667, __noswap_splat_laneq_s16(__rev2_667, __p3_667)); \ + __ret_667 = __builtin_shufflevector(__ret_667, __ret_667, __lane_reverse_128_32); \ + __ret_667; \ }) #endif __ai __attribute__((target("neon"))) int32_t vqdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmulhs_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmulhs_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqdmulhh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqdmulhh_s16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -56871,7 +58529,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -56879,10 +58537,10 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -56892,7 +58550,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -56900,10 +58558,10 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vqdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -56913,7 +58571,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -56921,10 +58579,10 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -56934,7 +58592,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -56942,87 +58600,87 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_lane_s32(__p0_652, __p1_652, __p2_652) __extension__ ({ \ - int32_t __ret_652; \ - int32_t __s0_652 = __p0_652; \ - int32x2_t __s1_652 = __p1_652; \ - __ret_652 = vqdmulhs_s32(__s0_652, vget_lane_s32(__s1_652, __p2_652)); \ - __ret_652; \ +#define vqdmulhs_lane_s32(__p0_668, __p1_668, __p2_668) __extension__ ({ \ + int32_t __ret_668; \ + int32_t __s0_668 = __p0_668; \ + int32x2_t __s1_668 = __p1_668; \ + __ret_668 = vqdmulhs_s32(__s0_668, vget_lane_s32(__s1_668, __p2_668)); \ + __ret_668; \ }) #else -#define vqdmulhs_lane_s32(__p0_653, __p1_653, __p2_653) __extension__ ({ \ - int32_t __ret_653; \ - int32_t __s0_653 = __p0_653; \ - int32x2_t __s1_653 = __p1_653; \ - int32x2_t __rev1_653; __rev1_653 = __builtin_shufflevector(__s1_653, __s1_653, 1, 0); \ - __ret_653 = vqdmulhs_s32(__s0_653, __noswap_vget_lane_s32(__rev1_653, __p2_653)); \ - __ret_653; \ +#define vqdmulhs_lane_s32(__p0_669, __p1_669, __p2_669) __extension__ ({ \ + int32_t __ret_669; \ + int32_t __s0_669 = __p0_669; \ + int32x2_t __s1_669 = __p1_669; \ + int32x2_t __rev1_669; __rev1_669 = __builtin_shufflevector(__s1_669, __s1_669, __lane_reverse_64_32); \ + __ret_669 = vqdmulhs_s32(__s0_669, __noswap_vget_lane_s32(__rev1_669, __p2_669)); \ + __ret_669; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_lane_s16(__p0_654, __p1_654, __p2_654) __extension__ ({ \ - int16_t __ret_654; \ - int16_t __s0_654 = __p0_654; \ - int16x4_t __s1_654 = __p1_654; \ - __ret_654 = vqdmulhh_s16(__s0_654, vget_lane_s16(__s1_654, __p2_654)); \ - __ret_654; \ +#define vqdmulhh_lane_s16(__p0_670, __p1_670, __p2_670) __extension__ ({ \ + int16_t __ret_670; \ + int16_t __s0_670 = __p0_670; \ + int16x4_t __s1_670 = __p1_670; \ + __ret_670 = vqdmulhh_s16(__s0_670, vget_lane_s16(__s1_670, __p2_670)); \ + __ret_670; \ }) #else -#define vqdmulhh_lane_s16(__p0_655, __p1_655, __p2_655) __extension__ ({ \ - int16_t __ret_655; \ - int16_t __s0_655 = __p0_655; \ - int16x4_t __s1_655 = __p1_655; \ - int16x4_t __rev1_655; __rev1_655 = __builtin_shufflevector(__s1_655, __s1_655, 3, 2, 1, 0); \ - __ret_655 = vqdmulhh_s16(__s0_655, __noswap_vget_lane_s16(__rev1_655, __p2_655)); \ - __ret_655; \ +#define vqdmulhh_lane_s16(__p0_671, __p1_671, __p2_671) __extension__ ({ \ + int16_t __ret_671; \ + int16_t __s0_671 = __p0_671; \ + int16x4_t __s1_671 = __p1_671; \ + int16x4_t __rev1_671; __rev1_671 = __builtin_shufflevector(__s1_671, __s1_671, __lane_reverse_64_16); \ + __ret_671 = vqdmulhh_s16(__s0_671, __noswap_vget_lane_s16(__rev1_671, __p2_671)); \ + __ret_671; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_laneq_s32(__p0_656, __p1_656, __p2_656) __extension__ ({ \ - int32_t __ret_656; \ - int32_t __s0_656 = __p0_656; \ - int32x4_t __s1_656 = __p1_656; \ - __ret_656 = vqdmulhs_s32(__s0_656, vgetq_lane_s32(__s1_656, __p2_656)); \ - __ret_656; \ +#define vqdmulhs_laneq_s32(__p0_672, __p1_672, __p2_672) __extension__ ({ \ + int32_t __ret_672; \ + int32_t __s0_672 = __p0_672; \ + int32x4_t __s1_672 = __p1_672; \ + __ret_672 = vqdmulhs_s32(__s0_672, vgetq_lane_s32(__s1_672, __p2_672)); \ + __ret_672; \ }) #else -#define vqdmulhs_laneq_s32(__p0_657, __p1_657, __p2_657) __extension__ ({ \ - int32_t __ret_657; \ - int32_t __s0_657 = __p0_657; \ - int32x4_t __s1_657 = __p1_657; \ - int32x4_t __rev1_657; __rev1_657 = __builtin_shufflevector(__s1_657, __s1_657, 3, 2, 1, 0); \ - __ret_657 = vqdmulhs_s32(__s0_657, __noswap_vgetq_lane_s32(__rev1_657, __p2_657)); \ - __ret_657; \ +#define vqdmulhs_laneq_s32(__p0_673, __p1_673, __p2_673) __extension__ ({ \ + int32_t __ret_673; \ + int32_t __s0_673 = __p0_673; \ + int32x4_t __s1_673 = __p1_673; \ + int32x4_t __rev1_673; __rev1_673 = __builtin_shufflevector(__s1_673, __s1_673, __lane_reverse_128_32); \ + __ret_673 = vqdmulhs_s32(__s0_673, __noswap_vgetq_lane_s32(__rev1_673, __p2_673)); \ + __ret_673; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_laneq_s16(__p0_658, __p1_658, __p2_658) __extension__ ({ \ - int16_t __ret_658; \ - int16_t __s0_658 = __p0_658; \ - int16x8_t __s1_658 = __p1_658; \ - __ret_658 = vqdmulhh_s16(__s0_658, vgetq_lane_s16(__s1_658, __p2_658)); \ - __ret_658; \ +#define vqdmulhh_laneq_s16(__p0_674, __p1_674, __p2_674) __extension__ ({ \ + int16_t __ret_674; \ + int16_t __s0_674 = __p0_674; \ + int16x8_t __s1_674 = __p1_674; \ + __ret_674 = vqdmulhh_s16(__s0_674, vgetq_lane_s16(__s1_674, __p2_674)); \ + __ret_674; \ }) #else -#define vqdmulhh_laneq_s16(__p0_659, __p1_659, __p2_659) __extension__ ({ \ - int16_t __ret_659; \ - int16_t __s0_659 = __p0_659; \ - int16x8_t __s1_659 = __p1_659; \ - int16x8_t __rev1_659; __rev1_659 = __builtin_shufflevector(__s1_659, __s1_659, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_659 = vqdmulhh_s16(__s0_659, __noswap_vgetq_lane_s16(__rev1_659, __p2_659)); \ - __ret_659; \ +#define vqdmulhh_laneq_s16(__p0_675, __p1_675, __p2_675) __extension__ ({ \ + int16_t __ret_675; \ + int16_t __s0_675 = __p0_675; \ + int16x8_t __s1_675 = __p1_675; \ + int16x8_t __rev1_675; __rev1_675 = __builtin_shufflevector(__s1_675, __s1_675, __lane_reverse_128_16); \ + __ret_675 = vqdmulhh_s16(__s0_675, __noswap_vgetq_lane_s16(__rev1_675, __p2_675)); \ + __ret_675; \ }) #endif @@ -57031,7 +58689,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -57039,10 +58697,10 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -57052,7 +58710,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -57060,10 +58718,10 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vqdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -57073,7 +58731,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -57081,10 +58739,10 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -57094,7 +58752,7 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -57102,22 +58760,22 @@ __ai __attribute__((target("neon"))) int16_t vqdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif __ai __attribute__((target("neon"))) int64_t vqdmulls_s32(int32_t __p0, int32_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqdmulls_s32(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqdmulls_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqdmullh_s16(int16_t __p0, int16_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqdmullh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqdmullh_s16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -57129,10 +58787,10 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmull_high_s32(int32x4_t __p0, #else __ai __attribute__((target("neon"))) int64x2_t vqdmull_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -57146,95 +58804,95 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmull_high_s16(int16x8_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_lane_s32(__p0_660, __p1_660, __p2_660) __extension__ ({ \ - int64x2_t __ret_660; \ - int32x4_t __s0_660 = __p0_660; \ - int32x2_t __s1_660 = __p1_660; \ - __ret_660 = vqdmull_s32(vget_high_s32(__s0_660), splat_lane_s32(__s1_660, __p2_660)); \ - __ret_660; \ +#define vqdmull_high_lane_s32(__p0_676, __p1_676, __p2_676) __extension__ ({ \ + int64x2_t __ret_676; \ + int32x4_t __s0_676 = __p0_676; \ + int32x2_t __s1_676 = __p1_676; \ + __ret_676 = vqdmull_s32(vget_high_s32(__s0_676), splat_lane_s32(__s1_676, __p2_676)); \ + __ret_676; \ }) #else -#define vqdmull_high_lane_s32(__p0_661, __p1_661, __p2_661) __extension__ ({ \ - int64x2_t __ret_661; \ - int32x4_t __s0_661 = __p0_661; \ - int32x2_t __s1_661 = __p1_661; \ - int32x4_t __rev0_661; __rev0_661 = __builtin_shufflevector(__s0_661, __s0_661, 3, 2, 1, 0); \ - int32x2_t __rev1_661; __rev1_661 = __builtin_shufflevector(__s1_661, __s1_661, 1, 0); \ - __ret_661 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_661), __noswap_splat_lane_s32(__rev1_661, __p2_661)); \ - __ret_661 = __builtin_shufflevector(__ret_661, __ret_661, 1, 0); \ - __ret_661; \ +#define vqdmull_high_lane_s32(__p0_677, __p1_677, __p2_677) __extension__ ({ \ + int64x2_t __ret_677; \ + int32x4_t __s0_677 = __p0_677; \ + int32x2_t __s1_677 = __p1_677; \ + int32x4_t __rev0_677; __rev0_677 = __builtin_shufflevector(__s0_677, __s0_677, __lane_reverse_128_32); \ + int32x2_t __rev1_677; __rev1_677 = __builtin_shufflevector(__s1_677, __s1_677, __lane_reverse_64_32); \ + __ret_677 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_677), __noswap_splat_lane_s32(__rev1_677, __p2_677)); \ + __ret_677 = __builtin_shufflevector(__ret_677, __ret_677, __lane_reverse_128_64); \ + __ret_677; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_lane_s16(__p0_662, __p1_662, __p2_662) __extension__ ({ \ - int32x4_t __ret_662; \ - int16x8_t __s0_662 = __p0_662; \ - int16x4_t __s1_662 = __p1_662; \ - __ret_662 = vqdmull_s16(vget_high_s16(__s0_662), splat_lane_s16(__s1_662, __p2_662)); \ - __ret_662; \ +#define vqdmull_high_lane_s16(__p0_678, __p1_678, __p2_678) __extension__ ({ \ + int32x4_t __ret_678; \ + int16x8_t __s0_678 = __p0_678; \ + int16x4_t __s1_678 = __p1_678; \ + __ret_678 = vqdmull_s16(vget_high_s16(__s0_678), splat_lane_s16(__s1_678, __p2_678)); \ + __ret_678; \ }) #else -#define vqdmull_high_lane_s16(__p0_663, __p1_663, __p2_663) __extension__ ({ \ - int32x4_t __ret_663; \ - int16x8_t __s0_663 = __p0_663; \ - int16x4_t __s1_663 = __p1_663; \ - int16x8_t __rev0_663; __rev0_663 = __builtin_shufflevector(__s0_663, __s0_663, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_663; __rev1_663 = __builtin_shufflevector(__s1_663, __s1_663, 3, 2, 1, 0); \ - __ret_663 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_663), __noswap_splat_lane_s16(__rev1_663, __p2_663)); \ - __ret_663 = __builtin_shufflevector(__ret_663, __ret_663, 3, 2, 1, 0); \ - __ret_663; \ +#define vqdmull_high_lane_s16(__p0_679, __p1_679, __p2_679) __extension__ ({ \ + int32x4_t __ret_679; \ + int16x8_t __s0_679 = __p0_679; \ + int16x4_t __s1_679 = __p1_679; \ + int16x8_t __rev0_679; __rev0_679 = __builtin_shufflevector(__s0_679, __s0_679, __lane_reverse_128_16); \ + int16x4_t __rev1_679; __rev1_679 = __builtin_shufflevector(__s1_679, __s1_679, __lane_reverse_64_16); \ + __ret_679 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_679), __noswap_splat_lane_s16(__rev1_679, __p2_679)); \ + __ret_679 = __builtin_shufflevector(__ret_679, __ret_679, __lane_reverse_128_32); \ + __ret_679; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_laneq_s32(__p0_664, __p1_664, __p2_664) __extension__ ({ \ - int64x2_t __ret_664; \ - int32x4_t __s0_664 = __p0_664; \ - int32x4_t __s1_664 = __p1_664; \ - __ret_664 = vqdmull_s32(vget_high_s32(__s0_664), splat_laneq_s32(__s1_664, __p2_664)); \ - __ret_664; \ +#define vqdmull_high_laneq_s32(__p0_680, __p1_680, __p2_680) __extension__ ({ \ + int64x2_t __ret_680; \ + int32x4_t __s0_680 = __p0_680; \ + int32x4_t __s1_680 = __p1_680; \ + __ret_680 = vqdmull_s32(vget_high_s32(__s0_680), splat_laneq_s32(__s1_680, __p2_680)); \ + __ret_680; \ }) #else -#define vqdmull_high_laneq_s32(__p0_665, __p1_665, __p2_665) __extension__ ({ \ - int64x2_t __ret_665; \ - int32x4_t __s0_665 = __p0_665; \ - int32x4_t __s1_665 = __p1_665; \ - int32x4_t __rev0_665; __rev0_665 = __builtin_shufflevector(__s0_665, __s0_665, 3, 2, 1, 0); \ - int32x4_t __rev1_665; __rev1_665 = __builtin_shufflevector(__s1_665, __s1_665, 3, 2, 1, 0); \ - __ret_665 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_665), __noswap_splat_laneq_s32(__rev1_665, __p2_665)); \ - __ret_665 = __builtin_shufflevector(__ret_665, __ret_665, 1, 0); \ - __ret_665; \ +#define vqdmull_high_laneq_s32(__p0_681, __p1_681, __p2_681) __extension__ ({ \ + int64x2_t __ret_681; \ + int32x4_t __s0_681 = __p0_681; \ + int32x4_t __s1_681 = __p1_681; \ + int32x4_t __rev0_681; __rev0_681 = __builtin_shufflevector(__s0_681, __s0_681, __lane_reverse_128_32); \ + int32x4_t __rev1_681; __rev1_681 = __builtin_shufflevector(__s1_681, __s1_681, __lane_reverse_128_32); \ + __ret_681 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_681), __noswap_splat_laneq_s32(__rev1_681, __p2_681)); \ + __ret_681 = __builtin_shufflevector(__ret_681, __ret_681, __lane_reverse_128_64); \ + __ret_681; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_laneq_s16(__p0_666, __p1_666, __p2_666) __extension__ ({ \ - int32x4_t __ret_666; \ - int16x8_t __s0_666 = __p0_666; \ - int16x8_t __s1_666 = __p1_666; \ - __ret_666 = vqdmull_s16(vget_high_s16(__s0_666), splat_laneq_s16(__s1_666, __p2_666)); \ - __ret_666; \ +#define vqdmull_high_laneq_s16(__p0_682, __p1_682, __p2_682) __extension__ ({ \ + int32x4_t __ret_682; \ + int16x8_t __s0_682 = __p0_682; \ + int16x8_t __s1_682 = __p1_682; \ + __ret_682 = vqdmull_s16(vget_high_s16(__s0_682), splat_laneq_s16(__s1_682, __p2_682)); \ + __ret_682; \ }) #else -#define vqdmull_high_laneq_s16(__p0_667, __p1_667, __p2_667) __extension__ ({ \ - int32x4_t __ret_667; \ - int16x8_t __s0_667 = __p0_667; \ - int16x8_t __s1_667 = __p1_667; \ - int16x8_t __rev0_667; __rev0_667 = __builtin_shufflevector(__s0_667, __s0_667, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_667; __rev1_667 = __builtin_shufflevector(__s1_667, __s1_667, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_667 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_667), __noswap_splat_laneq_s16(__rev1_667, __p2_667)); \ - __ret_667 = __builtin_shufflevector(__ret_667, __ret_667, 3, 2, 1, 0); \ - __ret_667; \ +#define vqdmull_high_laneq_s16(__p0_683, __p1_683, __p2_683) __extension__ ({ \ + int32x4_t __ret_683; \ + int16x8_t __s0_683 = __p0_683; \ + int16x8_t __s1_683 = __p1_683; \ + int16x8_t __rev0_683; __rev0_683 = __builtin_shufflevector(__s0_683, __s0_683, __lane_reverse_128_16); \ + int16x8_t __rev1_683; __rev1_683 = __builtin_shufflevector(__s1_683, __s1_683, __lane_reverse_128_16); \ + __ret_683 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_683), __noswap_splat_laneq_s16(__rev1_683, __p2_683)); \ + __ret_683 = __builtin_shufflevector(__ret_683, __ret_683, __lane_reverse_128_32); \ + __ret_683; \ }) #endif @@ -57247,9 +58905,9 @@ __ai __attribute__((target("neon"))) int64x2_t vqdmull_high_n_s32(int32x4_t __p0 #else __ai __attribute__((target("neon"))) int64x2_t vqdmull_high_n_s32(int32x4_t __p0, int32_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); __ret = __noswap_vqdmull_n_s32(__noswap_vget_high_s32(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -57263,159 +58921,159 @@ __ai __attribute__((target("neon"))) int32x4_t vqdmull_high_n_s16(int16x8_t __p0 #else __ai __attribute__((target("neon"))) int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vqdmull_n_s16(__noswap_vget_high_s16(__rev0), __p1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulls_lane_s32(__p0_668, __p1_668, __p2_668) __extension__ ({ \ - int64_t __ret_668; \ - int32_t __s0_668 = __p0_668; \ - int32x2_t __s1_668 = __p1_668; \ - __ret_668 = vqdmulls_s32(__s0_668, vget_lane_s32(__s1_668, __p2_668)); \ - __ret_668; \ +#define vqdmulls_lane_s32(__p0_684, __p1_684, __p2_684) __extension__ ({ \ + int64_t __ret_684; \ + int32_t __s0_684 = __p0_684; \ + int32x2_t __s1_684 = __p1_684; \ + __ret_684 = vqdmulls_s32(__s0_684, vget_lane_s32(__s1_684, __p2_684)); \ + __ret_684; \ }) #else -#define vqdmulls_lane_s32(__p0_669, __p1_669, __p2_669) __extension__ ({ \ - int64_t __ret_669; \ - int32_t __s0_669 = __p0_669; \ - int32x2_t __s1_669 = __p1_669; \ - int32x2_t __rev1_669; __rev1_669 = __builtin_shufflevector(__s1_669, __s1_669, 1, 0); \ - __ret_669 = vqdmulls_s32(__s0_669, __noswap_vget_lane_s32(__rev1_669, __p2_669)); \ - __ret_669; \ +#define vqdmulls_lane_s32(__p0_685, __p1_685, __p2_685) __extension__ ({ \ + int64_t __ret_685; \ + int32_t __s0_685 = __p0_685; \ + int32x2_t __s1_685 = __p1_685; \ + int32x2_t __rev1_685; __rev1_685 = __builtin_shufflevector(__s1_685, __s1_685, __lane_reverse_64_32); \ + __ret_685 = vqdmulls_s32(__s0_685, __noswap_vget_lane_s32(__rev1_685, __p2_685)); \ + __ret_685; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmullh_lane_s16(__p0_670, __p1_670, __p2_670) __extension__ ({ \ - int32_t __ret_670; \ - int16_t __s0_670 = __p0_670; \ - int16x4_t __s1_670 = __p1_670; \ - __ret_670 = vqdmullh_s16(__s0_670, vget_lane_s16(__s1_670, __p2_670)); \ - __ret_670; \ +#define vqdmullh_lane_s16(__p0_686, __p1_686, __p2_686) __extension__ ({ \ + int32_t __ret_686; \ + int16_t __s0_686 = __p0_686; \ + int16x4_t __s1_686 = __p1_686; \ + __ret_686 = vqdmullh_s16(__s0_686, vget_lane_s16(__s1_686, __p2_686)); \ + __ret_686; \ }) #else -#define vqdmullh_lane_s16(__p0_671, __p1_671, __p2_671) __extension__ ({ \ - int32_t __ret_671; \ - int16_t __s0_671 = __p0_671; \ - int16x4_t __s1_671 = __p1_671; \ - int16x4_t __rev1_671; __rev1_671 = __builtin_shufflevector(__s1_671, __s1_671, 3, 2, 1, 0); \ - __ret_671 = vqdmullh_s16(__s0_671, __noswap_vget_lane_s16(__rev1_671, __p2_671)); \ - __ret_671; \ +#define vqdmullh_lane_s16(__p0_687, __p1_687, __p2_687) __extension__ ({ \ + int32_t __ret_687; \ + int16_t __s0_687 = __p0_687; \ + int16x4_t __s1_687 = __p1_687; \ + int16x4_t __rev1_687; __rev1_687 = __builtin_shufflevector(__s1_687, __s1_687, __lane_reverse_64_16); \ + __ret_687 = vqdmullh_s16(__s0_687, __noswap_vget_lane_s16(__rev1_687, __p2_687)); \ + __ret_687; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulls_laneq_s32(__p0_672, __p1_672, __p2_672) __extension__ ({ \ - int64_t __ret_672; \ - int32_t __s0_672 = __p0_672; \ - int32x4_t __s1_672 = __p1_672; \ - __ret_672 = vqdmulls_s32(__s0_672, vgetq_lane_s32(__s1_672, __p2_672)); \ - __ret_672; \ +#define vqdmulls_laneq_s32(__p0_688, __p1_688, __p2_688) __extension__ ({ \ + int64_t __ret_688; \ + int32_t __s0_688 = __p0_688; \ + int32x4_t __s1_688 = __p1_688; \ + __ret_688 = vqdmulls_s32(__s0_688, vgetq_lane_s32(__s1_688, __p2_688)); \ + __ret_688; \ }) #else -#define vqdmulls_laneq_s32(__p0_673, __p1_673, __p2_673) __extension__ ({ \ - int64_t __ret_673; \ - int32_t __s0_673 = __p0_673; \ - int32x4_t __s1_673 = __p1_673; \ - int32x4_t __rev1_673; __rev1_673 = __builtin_shufflevector(__s1_673, __s1_673, 3, 2, 1, 0); \ - __ret_673 = vqdmulls_s32(__s0_673, __noswap_vgetq_lane_s32(__rev1_673, __p2_673)); \ - __ret_673; \ +#define vqdmulls_laneq_s32(__p0_689, __p1_689, __p2_689) __extension__ ({ \ + int64_t __ret_689; \ + int32_t __s0_689 = __p0_689; \ + int32x4_t __s1_689 = __p1_689; \ + int32x4_t __rev1_689; __rev1_689 = __builtin_shufflevector(__s1_689, __s1_689, __lane_reverse_128_32); \ + __ret_689 = vqdmulls_s32(__s0_689, __noswap_vgetq_lane_s32(__rev1_689, __p2_689)); \ + __ret_689; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmullh_laneq_s16(__p0_674, __p1_674, __p2_674) __extension__ ({ \ - int32_t __ret_674; \ - int16_t __s0_674 = __p0_674; \ - int16x8_t __s1_674 = __p1_674; \ - __ret_674 = vqdmullh_s16(__s0_674, vgetq_lane_s16(__s1_674, __p2_674)); \ - __ret_674; \ +#define vqdmullh_laneq_s16(__p0_690, __p1_690, __p2_690) __extension__ ({ \ + int32_t __ret_690; \ + int16_t __s0_690 = __p0_690; \ + int16x8_t __s1_690 = __p1_690; \ + __ret_690 = vqdmullh_s16(__s0_690, vgetq_lane_s16(__s1_690, __p2_690)); \ + __ret_690; \ }) #else -#define vqdmullh_laneq_s16(__p0_675, __p1_675, __p2_675) __extension__ ({ \ - int32_t __ret_675; \ - int16_t __s0_675 = __p0_675; \ - int16x8_t __s1_675 = __p1_675; \ - int16x8_t __rev1_675; __rev1_675 = __builtin_shufflevector(__s1_675, __s1_675, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_675 = vqdmullh_s16(__s0_675, __noswap_vgetq_lane_s16(__rev1_675, __p2_675)); \ - __ret_675; \ +#define vqdmullh_laneq_s16(__p0_691, __p1_691, __p2_691) __extension__ ({ \ + int32_t __ret_691; \ + int16_t __s0_691 = __p0_691; \ + int16x8_t __s1_691 = __p1_691; \ + int16x8_t __rev1_691; __rev1_691 = __builtin_shufflevector(__s1_691, __s1_691, __lane_reverse_128_16); \ + __ret_691 = vqdmullh_s16(__s0_691, __noswap_vgetq_lane_s16(__rev1_691, __p2_691)); \ + __ret_691; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_laneq_s32(__p0_676, __p1_676, __p2_676) __extension__ ({ \ - int64x2_t __ret_676; \ - int32x2_t __s0_676 = __p0_676; \ - int32x4_t __s1_676 = __p1_676; \ - __ret_676 = vqdmull_s32(__s0_676, splat_laneq_s32(__s1_676, __p2_676)); \ - __ret_676; \ +#define vqdmull_laneq_s32(__p0_692, __p1_692, __p2_692) __extension__ ({ \ + int64x2_t __ret_692; \ + int32x2_t __s0_692 = __p0_692; \ + int32x4_t __s1_692 = __p1_692; \ + __ret_692 = vqdmull_s32(__s0_692, splat_laneq_s32(__s1_692, __p2_692)); \ + __ret_692; \ }) #else -#define vqdmull_laneq_s32(__p0_677, __p1_677, __p2_677) __extension__ ({ \ - int64x2_t __ret_677; \ - int32x2_t __s0_677 = __p0_677; \ - int32x4_t __s1_677 = __p1_677; \ - int32x2_t __rev0_677; __rev0_677 = __builtin_shufflevector(__s0_677, __s0_677, 1, 0); \ - int32x4_t __rev1_677; __rev1_677 = __builtin_shufflevector(__s1_677, __s1_677, 3, 2, 1, 0); \ - __ret_677 = __noswap_vqdmull_s32(__rev0_677, __noswap_splat_laneq_s32(__rev1_677, __p2_677)); \ - __ret_677 = __builtin_shufflevector(__ret_677, __ret_677, 1, 0); \ - __ret_677; \ +#define vqdmull_laneq_s32(__p0_693, __p1_693, __p2_693) __extension__ ({ \ + int64x2_t __ret_693; \ + int32x2_t __s0_693 = __p0_693; \ + int32x4_t __s1_693 = __p1_693; \ + int32x2_t __rev0_693; __rev0_693 = __builtin_shufflevector(__s0_693, __s0_693, __lane_reverse_64_32); \ + int32x4_t __rev1_693; __rev1_693 = __builtin_shufflevector(__s1_693, __s1_693, __lane_reverse_128_32); \ + __ret_693 = __noswap_vqdmull_s32(__rev0_693, __noswap_splat_laneq_s32(__rev1_693, __p2_693)); \ + __ret_693 = __builtin_shufflevector(__ret_693, __ret_693, __lane_reverse_128_64); \ + __ret_693; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_laneq_s16(__p0_678, __p1_678, __p2_678) __extension__ ({ \ - int32x4_t __ret_678; \ - int16x4_t __s0_678 = __p0_678; \ - int16x8_t __s1_678 = __p1_678; \ - __ret_678 = vqdmull_s16(__s0_678, splat_laneq_s16(__s1_678, __p2_678)); \ - __ret_678; \ +#define vqdmull_laneq_s16(__p0_694, __p1_694, __p2_694) __extension__ ({ \ + int32x4_t __ret_694; \ + int16x4_t __s0_694 = __p0_694; \ + int16x8_t __s1_694 = __p1_694; \ + __ret_694 = vqdmull_s16(__s0_694, splat_laneq_s16(__s1_694, __p2_694)); \ + __ret_694; \ }) #else -#define vqdmull_laneq_s16(__p0_679, __p1_679, __p2_679) __extension__ ({ \ - int32x4_t __ret_679; \ - int16x4_t __s0_679 = __p0_679; \ - int16x8_t __s1_679 = __p1_679; \ - int16x4_t __rev0_679; __rev0_679 = __builtin_shufflevector(__s0_679, __s0_679, 3, 2, 1, 0); \ - int16x8_t __rev1_679; __rev1_679 = __builtin_shufflevector(__s1_679, __s1_679, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_679 = __noswap_vqdmull_s16(__rev0_679, __noswap_splat_laneq_s16(__rev1_679, __p2_679)); \ - __ret_679 = __builtin_shufflevector(__ret_679, __ret_679, 3, 2, 1, 0); \ - __ret_679; \ +#define vqdmull_laneq_s16(__p0_695, __p1_695, __p2_695) __extension__ ({ \ + int32x4_t __ret_695; \ + int16x4_t __s0_695 = __p0_695; \ + int16x8_t __s1_695 = __p1_695; \ + int16x4_t __rev0_695; __rev0_695 = __builtin_shufflevector(__s0_695, __s0_695, __lane_reverse_64_16); \ + int16x8_t __rev1_695; __rev1_695 = __builtin_shufflevector(__s1_695, __s1_695, __lane_reverse_128_16); \ + __ret_695 = __noswap_vqdmull_s16(__rev0_695, __noswap_splat_laneq_s16(__rev1_695, __p2_695)); \ + __ret_695 = __builtin_shufflevector(__ret_695, __ret_695, __lane_reverse_128_32); \ + __ret_695; \ }) #endif __ai __attribute__((target("neon"))) int16_t vqmovns_s32(int32_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovns_s32(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqmovns_s32(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqmovnd_s64(int64_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovnd_s64(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqmovnd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqmovnh_s16(int16_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovnh_s16(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqmovnh_s16(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint16_t vqmovns_u32(uint32_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqmovns_u32(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqmovns_u32(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vqmovnd_u64(uint64_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqmovnd_u64(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqmovnd_u64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint8_t vqmovnh_u16(uint16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqmovnh_u16(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqmovnh_u16(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -57427,10 +59085,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vqmovn_high_u32(uint16x4_t __p0, #else __ai __attribute__((target("neon"))) uint16x8_t vqmovn_high_u32(uint16x4_t __p0, uint32x4_t __p1) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vcombine_u16(__rev0, __noswap_vqmovn_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -57444,10 +59102,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vqmovn_high_u64(uint32x2_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vqmovn_high_u64(uint32x2_t __p0, uint64x2_t __p1) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vcombine_u32(__rev0, __noswap_vqmovn_u64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -57461,10 +59119,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vqmovn_high_u16(uint8x8_t __p0, #else __ai __attribute__((target("neon"))) uint8x16_t vqmovn_high_u16(uint8x8_t __p0, uint16x8_t __p1) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vcombine_u8(__rev0, __noswap_vqmovn_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -57478,10 +59136,10 @@ __ai __attribute__((target("neon"))) int16x8_t vqmovn_high_s32(int16x4_t __p0, i #else __ai __attribute__((target("neon"))) int16x8_t vqmovn_high_s32(int16x4_t __p0, int32x4_t __p1) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vcombine_s16(__rev0, __noswap_vqmovn_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -57495,10 +59153,10 @@ __ai __attribute__((target("neon"))) int32x4_t vqmovn_high_s64(int32x2_t __p0, i #else __ai __attribute__((target("neon"))) int32x4_t vqmovn_high_s64(int32x2_t __p0, int64x2_t __p1) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __noswap_vcombine_s32(__rev0, __noswap_vqmovn_s64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -57512,42 +59170,42 @@ __ai __attribute__((target("neon"))) int8x16_t vqmovn_high_s16(int8x8_t __p0, in #else __ai __attribute__((target("neon"))) int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vcombine_s8(__rev0, __noswap_vqmovn_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif __ai __attribute__((target("neon"))) uint16_t vqmovuns_s32(int32_t __p0) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqmovuns_s32(__p0); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqmovuns_s32(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vqmovund_s64(int64_t __p0) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqmovund_s64(__p0); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqmovund_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) uint8_t vqmovunh_s16(int16_t __p0) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqmovunh_s16(__p0); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqmovunh_s16(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vqmovun_high_s32(uint16x4_t __p0, int32x4_t __p1) { uint16x8_t __ret; - __ret = vcombine_u16((uint16x4_t)(__p0), vqmovun_s32(__p1)); + __ret = vcombine_u16(__builtin_bit_cast(uint16x4_t, __p0), vqmovun_s32(__p1)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vqmovun_high_s32(uint16x4_t __p0, int32x4_t __p1) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = __noswap_vcombine_u16((uint16x4_t)(__rev0), __noswap_vqmovun_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0), __noswap_vqmovun_s32(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -57555,16 +59213,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vqmovun_high_s32(uint16x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vqmovun_high_s64(uint32x2_t __p0, int64x2_t __p1) { uint32x4_t __ret; - __ret = vcombine_u32((uint32x2_t)(__p0), vqmovun_s64(__p1)); + __ret = vcombine_u32(__builtin_bit_cast(uint32x2_t, __p0), vqmovun_s64(__p1)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vqmovun_high_s64(uint32x2_t __p0, int64x2_t __p1) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = __noswap_vcombine_u32((uint32x2_t)(__rev0), __noswap_vqmovun_s64(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0), __noswap_vqmovun_s64(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -57572,16 +59230,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vqmovun_high_s64(uint32x2_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqmovun_high_s16(uint8x8_t __p0, int16x8_t __p1) { uint8x16_t __ret; - __ret = vcombine_u8((uint8x8_t)(__p0), vqmovun_s16(__p1)); + __ret = vcombine_u8(__builtin_bit_cast(uint8x8_t, __p0), vqmovun_s16(__p1)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqmovun_high_s16(uint8x8_t __p0, int16x8_t __p1) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = __noswap_vcombine_u8((uint8x8_t)(__rev0), __noswap_vqmovun_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0), __noswap_vqmovun_s16(__rev1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -57589,52 +59247,52 @@ __ai __attribute__((target("neon"))) uint8x16_t vqmovun_high_s16(uint8x8_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vqnegq_s64(int64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __p0), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vqnegq_s64(int64x2_t __p0) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (int64x2_t) __builtin_neon_vqnegq_v((int8x16_t)__rev0, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vqnegq_v(__builtin_bit_cast(int8x16_t, __rev0), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vqneg_s64(int64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vqneg_v(__builtin_bit_cast(int8x8_t, __p0), 3)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqnegb_s8(int8_t __p0) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqnegb_s8(__p0); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqnegb_s8(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqnegs_s32(int32_t __p0) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqnegs_s32(__p0); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqnegs_s32(__p0)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqnegd_s64(int64_t __p0) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqnegd_s64(__p0); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqnegd_s64(__p0)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqnegh_s16(int16_t __p0) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqnegh_s16(__p0); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqnegh_s16(__p0)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqrdmulhs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmulhs_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqrdmulhs_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmulhh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqrdmulhh_s16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -57642,7 +59300,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -57650,10 +59308,10 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -57663,7 +59321,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -57671,10 +59329,10 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_lane_v((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_lane_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -57684,7 +59342,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -57692,10 +59350,10 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x2_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -57705,7 +59363,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_lane_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -57713,87 +59371,87 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x4_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqrdmulh_lane_v((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_lane_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_lane_s32(__p0_680, __p1_680, __p2_680) __extension__ ({ \ - int32_t __ret_680; \ - int32_t __s0_680 = __p0_680; \ - int32x2_t __s1_680 = __p1_680; \ - __ret_680 = vqrdmulhs_s32(__s0_680, vget_lane_s32(__s1_680, __p2_680)); \ - __ret_680; \ +#define vqrdmulhs_lane_s32(__p0_696, __p1_696, __p2_696) __extension__ ({ \ + int32_t __ret_696; \ + int32_t __s0_696 = __p0_696; \ + int32x2_t __s1_696 = __p1_696; \ + __ret_696 = vqrdmulhs_s32(__s0_696, vget_lane_s32(__s1_696, __p2_696)); \ + __ret_696; \ }) #else -#define vqrdmulhs_lane_s32(__p0_681, __p1_681, __p2_681) __extension__ ({ \ - int32_t __ret_681; \ - int32_t __s0_681 = __p0_681; \ - int32x2_t __s1_681 = __p1_681; \ - int32x2_t __rev1_681; __rev1_681 = __builtin_shufflevector(__s1_681, __s1_681, 1, 0); \ - __ret_681 = vqrdmulhs_s32(__s0_681, __noswap_vget_lane_s32(__rev1_681, __p2_681)); \ - __ret_681; \ +#define vqrdmulhs_lane_s32(__p0_697, __p1_697, __p2_697) __extension__ ({ \ + int32_t __ret_697; \ + int32_t __s0_697 = __p0_697; \ + int32x2_t __s1_697 = __p1_697; \ + int32x2_t __rev1_697; __rev1_697 = __builtin_shufflevector(__s1_697, __s1_697, __lane_reverse_64_32); \ + __ret_697 = vqrdmulhs_s32(__s0_697, __noswap_vget_lane_s32(__rev1_697, __p2_697)); \ + __ret_697; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_lane_s16(__p0_682, __p1_682, __p2_682) __extension__ ({ \ - int16_t __ret_682; \ - int16_t __s0_682 = __p0_682; \ - int16x4_t __s1_682 = __p1_682; \ - __ret_682 = vqrdmulhh_s16(__s0_682, vget_lane_s16(__s1_682, __p2_682)); \ - __ret_682; \ +#define vqrdmulhh_lane_s16(__p0_698, __p1_698, __p2_698) __extension__ ({ \ + int16_t __ret_698; \ + int16_t __s0_698 = __p0_698; \ + int16x4_t __s1_698 = __p1_698; \ + __ret_698 = vqrdmulhh_s16(__s0_698, vget_lane_s16(__s1_698, __p2_698)); \ + __ret_698; \ }) #else -#define vqrdmulhh_lane_s16(__p0_683, __p1_683, __p2_683) __extension__ ({ \ - int16_t __ret_683; \ - int16_t __s0_683 = __p0_683; \ - int16x4_t __s1_683 = __p1_683; \ - int16x4_t __rev1_683; __rev1_683 = __builtin_shufflevector(__s1_683, __s1_683, 3, 2, 1, 0); \ - __ret_683 = vqrdmulhh_s16(__s0_683, __noswap_vget_lane_s16(__rev1_683, __p2_683)); \ - __ret_683; \ +#define vqrdmulhh_lane_s16(__p0_699, __p1_699, __p2_699) __extension__ ({ \ + int16_t __ret_699; \ + int16_t __s0_699 = __p0_699; \ + int16x4_t __s1_699 = __p1_699; \ + int16x4_t __rev1_699; __rev1_699 = __builtin_shufflevector(__s1_699, __s1_699, __lane_reverse_64_16); \ + __ret_699 = vqrdmulhh_s16(__s0_699, __noswap_vget_lane_s16(__rev1_699, __p2_699)); \ + __ret_699; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_laneq_s32(__p0_684, __p1_684, __p2_684) __extension__ ({ \ - int32_t __ret_684; \ - int32_t __s0_684 = __p0_684; \ - int32x4_t __s1_684 = __p1_684; \ - __ret_684 = vqrdmulhs_s32(__s0_684, vgetq_lane_s32(__s1_684, __p2_684)); \ - __ret_684; \ +#define vqrdmulhs_laneq_s32(__p0_700, __p1_700, __p2_700) __extension__ ({ \ + int32_t __ret_700; \ + int32_t __s0_700 = __p0_700; \ + int32x4_t __s1_700 = __p1_700; \ + __ret_700 = vqrdmulhs_s32(__s0_700, vgetq_lane_s32(__s1_700, __p2_700)); \ + __ret_700; \ }) #else -#define vqrdmulhs_laneq_s32(__p0_685, __p1_685, __p2_685) __extension__ ({ \ - int32_t __ret_685; \ - int32_t __s0_685 = __p0_685; \ - int32x4_t __s1_685 = __p1_685; \ - int32x4_t __rev1_685; __rev1_685 = __builtin_shufflevector(__s1_685, __s1_685, 3, 2, 1, 0); \ - __ret_685 = vqrdmulhs_s32(__s0_685, __noswap_vgetq_lane_s32(__rev1_685, __p2_685)); \ - __ret_685; \ +#define vqrdmulhs_laneq_s32(__p0_701, __p1_701, __p2_701) __extension__ ({ \ + int32_t __ret_701; \ + int32_t __s0_701 = __p0_701; \ + int32x4_t __s1_701 = __p1_701; \ + int32x4_t __rev1_701; __rev1_701 = __builtin_shufflevector(__s1_701, __s1_701, __lane_reverse_128_32); \ + __ret_701 = vqrdmulhs_s32(__s0_701, __noswap_vgetq_lane_s32(__rev1_701, __p2_701)); \ + __ret_701; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_laneq_s16(__p0_686, __p1_686, __p2_686) __extension__ ({ \ - int16_t __ret_686; \ - int16_t __s0_686 = __p0_686; \ - int16x8_t __s1_686 = __p1_686; \ - __ret_686 = vqrdmulhh_s16(__s0_686, vgetq_lane_s16(__s1_686, __p2_686)); \ - __ret_686; \ +#define vqrdmulhh_laneq_s16(__p0_702, __p1_702, __p2_702) __extension__ ({ \ + int16_t __ret_702; \ + int16_t __s0_702 = __p0_702; \ + int16x8_t __s1_702 = __p1_702; \ + __ret_702 = vqrdmulhh_s16(__s0_702, vgetq_lane_s16(__s1_702, __p2_702)); \ + __ret_702; \ }) #else -#define vqrdmulhh_laneq_s16(__p0_687, __p1_687, __p2_687) __extension__ ({ \ - int16_t __ret_687; \ - int16_t __s0_687 = __p0_687; \ - int16x8_t __s1_687 = __p1_687; \ - int16x8_t __rev1_687; __rev1_687 = __builtin_shufflevector(__s1_687, __s1_687, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_687 = vqrdmulhh_s16(__s0_687, __noswap_vgetq_lane_s16(__rev1_687, __p2_687)); \ - __ret_687; \ +#define vqrdmulhh_laneq_s16(__p0_703, __p1_703, __p2_703) __extension__ ({ \ + int16_t __ret_703; \ + int16_t __s0_703 = __p0_703; \ + int16x8_t __s1_703 = __p1_703; \ + int16x8_t __rev1_703; __rev1_703 = __builtin_shufflevector(__s1_703, __s1_703, __lane_reverse_128_16); \ + __ret_703 = vqrdmulhh_s16(__s0_703, __noswap_vgetq_lane_s16(__rev1_703, __p2_703)); \ + __ret_703; \ }) #endif @@ -57802,7 +59460,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 34); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 34)); \ __ret; \ }) #else @@ -57810,10 +59468,10 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x4_t __ret; \ int32x4_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x4_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 34); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vqrdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 34)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -57823,7 +59481,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 33)); \ __ret; \ }) #else @@ -57831,10 +59489,10 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x8_t __ret; \ int16x8_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x8_t) __builtin_neon_vqrdmulhq_laneq_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vqrdmulhq_laneq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 33)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); \ __ret; \ }) #endif @@ -57844,7 +59502,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - __ret = (int32x2_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 2); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 2)); \ __ret; \ }) #else @@ -57852,10 +59510,10 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int32x2_t __ret; \ int32x2_t __s0 = __p0; \ int32x4_t __s1 = __p1; \ - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - __ret = (int32x2_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_32); \ + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vqrdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); \ __ret; \ }) #endif @@ -57865,7 +59523,7 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - __ret = (int16x4_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__s0, (int8x16_t)__s1, __p2, 1); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 1)); \ __ret; \ }) #else @@ -57873,705 +59531,705 @@ __ai __attribute__((target("neon"))) int16_t vqrdmulhh_s16(int16_t __p0, int16_t int16x4_t __ret; \ int16x4_t __s0 = __p0; \ int16x8_t __s1 = __p1; \ - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret = (int16x4_t) __builtin_neon_vqrdmulh_laneq_v((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 1); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_64_16); \ + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_16); \ + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vqrdmulh_laneq_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 1)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); \ __ret; \ }) #endif __ai __attribute__((target("neon"))) uint8_t vqrshlb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqrshlb_u8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vqrshls_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqrshls_u32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vqrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vqrshld_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint16_t vqrshlh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqrshlh_u16(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqrshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqrshlb_s8(__p0, __p1); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqrshlb_s8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqrshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrshls_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqrshls_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqrshld_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqrshld_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrshlh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqrshlh_s16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u32(__p0_688, __p1_688, __p2_688) __extension__ ({ \ - uint16x8_t __ret_688; \ - uint16x4_t __s0_688 = __p0_688; \ - uint32x4_t __s1_688 = __p1_688; \ - __ret_688 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_688), (uint16x4_t)(vqrshrn_n_u32(__s1_688, __p2_688)))); \ - __ret_688; \ +#define vqrshrn_high_n_u32(__p0_704, __p1_704, __p2_704) __extension__ ({ \ + uint16x8_t __ret_704; \ + uint16x4_t __s0_704 = __p0_704; \ + uint32x4_t __s1_704 = __p1_704; \ + __ret_704 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_704), __builtin_bit_cast(uint16x4_t, vqrshrn_n_u32(__s1_704, __p2_704)))); \ + __ret_704; \ }) #else -#define vqrshrn_high_n_u32(__p0_689, __p1_689, __p2_689) __extension__ ({ \ - uint16x8_t __ret_689; \ - uint16x4_t __s0_689 = __p0_689; \ - uint32x4_t __s1_689 = __p1_689; \ - uint16x4_t __rev0_689; __rev0_689 = __builtin_shufflevector(__s0_689, __s0_689, 3, 2, 1, 0); \ - uint32x4_t __rev1_689; __rev1_689 = __builtin_shufflevector(__s1_689, __s1_689, 3, 2, 1, 0); \ - __ret_689 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_689), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_689, __p2_689)))); \ - __ret_689 = __builtin_shufflevector(__ret_689, __ret_689, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_689; \ +#define vqrshrn_high_n_u32(__p0_705, __p1_705, __p2_705) __extension__ ({ \ + uint16x8_t __ret_705; \ + uint16x4_t __s0_705 = __p0_705; \ + uint32x4_t __s1_705 = __p1_705; \ + uint16x4_t __rev0_705; __rev0_705 = __builtin_shufflevector(__s0_705, __s0_705, __lane_reverse_64_16); \ + uint32x4_t __rev1_705; __rev1_705 = __builtin_shufflevector(__s1_705, __s1_705, __lane_reverse_128_32); \ + __ret_705 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_705), __builtin_bit_cast(uint16x4_t, __noswap_vqrshrn_n_u32(__rev1_705, __p2_705)))); \ + __ret_705 = __builtin_shufflevector(__ret_705, __ret_705, __lane_reverse_128_16); \ + __ret_705; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u64(__p0_690, __p1_690, __p2_690) __extension__ ({ \ - uint32x4_t __ret_690; \ - uint32x2_t __s0_690 = __p0_690; \ - uint64x2_t __s1_690 = __p1_690; \ - __ret_690 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_690), (uint32x2_t)(vqrshrn_n_u64(__s1_690, __p2_690)))); \ - __ret_690; \ +#define vqrshrn_high_n_u64(__p0_706, __p1_706, __p2_706) __extension__ ({ \ + uint32x4_t __ret_706; \ + uint32x2_t __s0_706 = __p0_706; \ + uint64x2_t __s1_706 = __p1_706; \ + __ret_706 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_706), __builtin_bit_cast(uint32x2_t, vqrshrn_n_u64(__s1_706, __p2_706)))); \ + __ret_706; \ }) #else -#define vqrshrn_high_n_u64(__p0_691, __p1_691, __p2_691) __extension__ ({ \ - uint32x4_t __ret_691; \ - uint32x2_t __s0_691 = __p0_691; \ - uint64x2_t __s1_691 = __p1_691; \ - uint32x2_t __rev0_691; __rev0_691 = __builtin_shufflevector(__s0_691, __s0_691, 1, 0); \ - uint64x2_t __rev1_691; __rev1_691 = __builtin_shufflevector(__s1_691, __s1_691, 1, 0); \ - __ret_691 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_691), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_691, __p2_691)))); \ - __ret_691 = __builtin_shufflevector(__ret_691, __ret_691, 3, 2, 1, 0); \ - __ret_691; \ +#define vqrshrn_high_n_u64(__p0_707, __p1_707, __p2_707) __extension__ ({ \ + uint32x4_t __ret_707; \ + uint32x2_t __s0_707 = __p0_707; \ + uint64x2_t __s1_707 = __p1_707; \ + uint32x2_t __rev0_707; __rev0_707 = __builtin_shufflevector(__s0_707, __s0_707, __lane_reverse_64_32); \ + uint64x2_t __rev1_707; __rev1_707 = __builtin_shufflevector(__s1_707, __s1_707, __lane_reverse_128_64); \ + __ret_707 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_707), __builtin_bit_cast(uint32x2_t, __noswap_vqrshrn_n_u64(__rev1_707, __p2_707)))); \ + __ret_707 = __builtin_shufflevector(__ret_707, __ret_707, __lane_reverse_128_32); \ + __ret_707; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u16(__p0_692, __p1_692, __p2_692) __extension__ ({ \ - uint8x16_t __ret_692; \ - uint8x8_t __s0_692 = __p0_692; \ - uint16x8_t __s1_692 = __p1_692; \ - __ret_692 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_692), (uint8x8_t)(vqrshrn_n_u16(__s1_692, __p2_692)))); \ - __ret_692; \ +#define vqrshrn_high_n_u16(__p0_708, __p1_708, __p2_708) __extension__ ({ \ + uint8x16_t __ret_708; \ + uint8x8_t __s0_708 = __p0_708; \ + uint16x8_t __s1_708 = __p1_708; \ + __ret_708 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_708), __builtin_bit_cast(uint8x8_t, vqrshrn_n_u16(__s1_708, __p2_708)))); \ + __ret_708; \ }) #else -#define vqrshrn_high_n_u16(__p0_693, __p1_693, __p2_693) __extension__ ({ \ - uint8x16_t __ret_693; \ - uint8x8_t __s0_693 = __p0_693; \ - uint16x8_t __s1_693 = __p1_693; \ - uint8x8_t __rev0_693; __rev0_693 = __builtin_shufflevector(__s0_693, __s0_693, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_693; __rev1_693 = __builtin_shufflevector(__s1_693, __s1_693, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_693 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_693), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_693, __p2_693)))); \ - __ret_693 = __builtin_shufflevector(__ret_693, __ret_693, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_693; \ +#define vqrshrn_high_n_u16(__p0_709, __p1_709, __p2_709) __extension__ ({ \ + uint8x16_t __ret_709; \ + uint8x8_t __s0_709 = __p0_709; \ + uint16x8_t __s1_709 = __p1_709; \ + uint8x8_t __rev0_709; __rev0_709 = __builtin_shufflevector(__s0_709, __s0_709, __lane_reverse_64_8); \ + uint16x8_t __rev1_709; __rev1_709 = __builtin_shufflevector(__s1_709, __s1_709, __lane_reverse_128_16); \ + __ret_709 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_709), __builtin_bit_cast(uint8x8_t, __noswap_vqrshrn_n_u16(__rev1_709, __p2_709)))); \ + __ret_709 = __builtin_shufflevector(__ret_709, __ret_709, __lane_reverse_128_8); \ + __ret_709; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s32(__p0_694, __p1_694, __p2_694) __extension__ ({ \ - int16x8_t __ret_694; \ - int16x4_t __s0_694 = __p0_694; \ - int32x4_t __s1_694 = __p1_694; \ - __ret_694 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_694), (int16x4_t)(vqrshrn_n_s32(__s1_694, __p2_694)))); \ - __ret_694; \ +#define vqrshrn_high_n_s32(__p0_710, __p1_710, __p2_710) __extension__ ({ \ + int16x8_t __ret_710; \ + int16x4_t __s0_710 = __p0_710; \ + int32x4_t __s1_710 = __p1_710; \ + __ret_710 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_710), __builtin_bit_cast(int16x4_t, vqrshrn_n_s32(__s1_710, __p2_710)))); \ + __ret_710; \ }) #else -#define vqrshrn_high_n_s32(__p0_695, __p1_695, __p2_695) __extension__ ({ \ - int16x8_t __ret_695; \ - int16x4_t __s0_695 = __p0_695; \ - int32x4_t __s1_695 = __p1_695; \ - int16x4_t __rev0_695; __rev0_695 = __builtin_shufflevector(__s0_695, __s0_695, 3, 2, 1, 0); \ - int32x4_t __rev1_695; __rev1_695 = __builtin_shufflevector(__s1_695, __s1_695, 3, 2, 1, 0); \ - __ret_695 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_695), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_695, __p2_695)))); \ - __ret_695 = __builtin_shufflevector(__ret_695, __ret_695, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_695; \ +#define vqrshrn_high_n_s32(__p0_711, __p1_711, __p2_711) __extension__ ({ \ + int16x8_t __ret_711; \ + int16x4_t __s0_711 = __p0_711; \ + int32x4_t __s1_711 = __p1_711; \ + int16x4_t __rev0_711; __rev0_711 = __builtin_shufflevector(__s0_711, __s0_711, __lane_reverse_64_16); \ + int32x4_t __rev1_711; __rev1_711 = __builtin_shufflevector(__s1_711, __s1_711, __lane_reverse_128_32); \ + __ret_711 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_711), __builtin_bit_cast(int16x4_t, __noswap_vqrshrn_n_s32(__rev1_711, __p2_711)))); \ + __ret_711 = __builtin_shufflevector(__ret_711, __ret_711, __lane_reverse_128_16); \ + __ret_711; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s64(__p0_696, __p1_696, __p2_696) __extension__ ({ \ - int32x4_t __ret_696; \ - int32x2_t __s0_696 = __p0_696; \ - int64x2_t __s1_696 = __p1_696; \ - __ret_696 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_696), (int32x2_t)(vqrshrn_n_s64(__s1_696, __p2_696)))); \ - __ret_696; \ +#define vqrshrn_high_n_s64(__p0_712, __p1_712, __p2_712) __extension__ ({ \ + int32x4_t __ret_712; \ + int32x2_t __s0_712 = __p0_712; \ + int64x2_t __s1_712 = __p1_712; \ + __ret_712 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_712), __builtin_bit_cast(int32x2_t, vqrshrn_n_s64(__s1_712, __p2_712)))); \ + __ret_712; \ }) #else -#define vqrshrn_high_n_s64(__p0_697, __p1_697, __p2_697) __extension__ ({ \ - int32x4_t __ret_697; \ - int32x2_t __s0_697 = __p0_697; \ - int64x2_t __s1_697 = __p1_697; \ - int32x2_t __rev0_697; __rev0_697 = __builtin_shufflevector(__s0_697, __s0_697, 1, 0); \ - int64x2_t __rev1_697; __rev1_697 = __builtin_shufflevector(__s1_697, __s1_697, 1, 0); \ - __ret_697 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_697), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_697, __p2_697)))); \ - __ret_697 = __builtin_shufflevector(__ret_697, __ret_697, 3, 2, 1, 0); \ - __ret_697; \ +#define vqrshrn_high_n_s64(__p0_713, __p1_713, __p2_713) __extension__ ({ \ + int32x4_t __ret_713; \ + int32x2_t __s0_713 = __p0_713; \ + int64x2_t __s1_713 = __p1_713; \ + int32x2_t __rev0_713; __rev0_713 = __builtin_shufflevector(__s0_713, __s0_713, __lane_reverse_64_32); \ + int64x2_t __rev1_713; __rev1_713 = __builtin_shufflevector(__s1_713, __s1_713, __lane_reverse_128_64); \ + __ret_713 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_713), __builtin_bit_cast(int32x2_t, __noswap_vqrshrn_n_s64(__rev1_713, __p2_713)))); \ + __ret_713 = __builtin_shufflevector(__ret_713, __ret_713, __lane_reverse_128_32); \ + __ret_713; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s16(__p0_698, __p1_698, __p2_698) __extension__ ({ \ - int8x16_t __ret_698; \ - int8x8_t __s0_698 = __p0_698; \ - int16x8_t __s1_698 = __p1_698; \ - __ret_698 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_698), (int8x8_t)(vqrshrn_n_s16(__s1_698, __p2_698)))); \ - __ret_698; \ +#define vqrshrn_high_n_s16(__p0_714, __p1_714, __p2_714) __extension__ ({ \ + int8x16_t __ret_714; \ + int8x8_t __s0_714 = __p0_714; \ + int16x8_t __s1_714 = __p1_714; \ + __ret_714 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_714), __builtin_bit_cast(int8x8_t, vqrshrn_n_s16(__s1_714, __p2_714)))); \ + __ret_714; \ }) #else -#define vqrshrn_high_n_s16(__p0_699, __p1_699, __p2_699) __extension__ ({ \ - int8x16_t __ret_699; \ - int8x8_t __s0_699 = __p0_699; \ - int16x8_t __s1_699 = __p1_699; \ - int8x8_t __rev0_699; __rev0_699 = __builtin_shufflevector(__s0_699, __s0_699, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_699; __rev1_699 = __builtin_shufflevector(__s1_699, __s1_699, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_699 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_699), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_699, __p2_699)))); \ - __ret_699 = __builtin_shufflevector(__ret_699, __ret_699, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_699; \ +#define vqrshrn_high_n_s16(__p0_715, __p1_715, __p2_715) __extension__ ({ \ + int8x16_t __ret_715; \ + int8x8_t __s0_715 = __p0_715; \ + int16x8_t __s1_715 = __p1_715; \ + int8x8_t __rev0_715; __rev0_715 = __builtin_shufflevector(__s0_715, __s0_715, __lane_reverse_64_8); \ + int16x8_t __rev1_715; __rev1_715 = __builtin_shufflevector(__s1_715, __s1_715, __lane_reverse_128_16); \ + __ret_715 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_715), __builtin_bit_cast(int8x8_t, __noswap_vqrshrn_n_s16(__rev1_715, __p2_715)))); \ + __ret_715 = __builtin_shufflevector(__ret_715, __ret_715, __lane_reverse_128_8); \ + __ret_715; \ }) #endif #define vqrshrns_n_u32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint32_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vqrshrns_n_u32(__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqrshrns_n_u32(__s0, __p1)); \ __ret; \ }) #define vqrshrnd_n_u64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vqrshrnd_n_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqrshrnd_n_u64(__s0, __p1)); \ __ret; \ }) #define vqrshrnh_n_u16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vqrshrnh_n_u16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqrshrnh_n_u16(__s0, __p1)); \ __ret; \ }) #define vqrshrns_n_s32(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int32_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vqrshrns_n_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqrshrns_n_s32(__s0, __p1)); \ __ret; \ }) #define vqrshrnd_n_s64(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vqrshrnd_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqrshrnd_n_s64(__s0, __p1)); \ __ret; \ }) #define vqrshrnh_n_s16(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int16_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vqrshrnh_n_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqrshrnh_n_s16(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s32(__p0_700, __p1_700, __p2_700) __extension__ ({ \ - int16x8_t __ret_700; \ - int16x4_t __s0_700 = __p0_700; \ - int32x4_t __s1_700 = __p1_700; \ - __ret_700 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_700), (int16x4_t)(vqrshrun_n_s32(__s1_700, __p2_700)))); \ - __ret_700; \ +#define vqrshrun_high_n_s32(__p0_716, __p1_716, __p2_716) __extension__ ({ \ + int16x8_t __ret_716; \ + int16x4_t __s0_716 = __p0_716; \ + int32x4_t __s1_716 = __p1_716; \ + __ret_716 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_716), __builtin_bit_cast(int16x4_t, vqrshrun_n_s32(__s1_716, __p2_716)))); \ + __ret_716; \ }) #else -#define vqrshrun_high_n_s32(__p0_701, __p1_701, __p2_701) __extension__ ({ \ - int16x8_t __ret_701; \ - int16x4_t __s0_701 = __p0_701; \ - int32x4_t __s1_701 = __p1_701; \ - int16x4_t __rev0_701; __rev0_701 = __builtin_shufflevector(__s0_701, __s0_701, 3, 2, 1, 0); \ - int32x4_t __rev1_701; __rev1_701 = __builtin_shufflevector(__s1_701, __s1_701, 3, 2, 1, 0); \ - __ret_701 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_701), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_701, __p2_701)))); \ - __ret_701 = __builtin_shufflevector(__ret_701, __ret_701, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_701; \ +#define vqrshrun_high_n_s32(__p0_717, __p1_717, __p2_717) __extension__ ({ \ + int16x8_t __ret_717; \ + int16x4_t __s0_717 = __p0_717; \ + int32x4_t __s1_717 = __p1_717; \ + int16x4_t __rev0_717; __rev0_717 = __builtin_shufflevector(__s0_717, __s0_717, __lane_reverse_64_16); \ + int32x4_t __rev1_717; __rev1_717 = __builtin_shufflevector(__s1_717, __s1_717, __lane_reverse_128_32); \ + __ret_717 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_717), __builtin_bit_cast(int16x4_t, __noswap_vqrshrun_n_s32(__rev1_717, __p2_717)))); \ + __ret_717 = __builtin_shufflevector(__ret_717, __ret_717, __lane_reverse_128_16); \ + __ret_717; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s64(__p0_702, __p1_702, __p2_702) __extension__ ({ \ - int32x4_t __ret_702; \ - int32x2_t __s0_702 = __p0_702; \ - int64x2_t __s1_702 = __p1_702; \ - __ret_702 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_702), (int32x2_t)(vqrshrun_n_s64(__s1_702, __p2_702)))); \ - __ret_702; \ +#define vqrshrun_high_n_s64(__p0_718, __p1_718, __p2_718) __extension__ ({ \ + int32x4_t __ret_718; \ + int32x2_t __s0_718 = __p0_718; \ + int64x2_t __s1_718 = __p1_718; \ + __ret_718 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_718), __builtin_bit_cast(int32x2_t, vqrshrun_n_s64(__s1_718, __p2_718)))); \ + __ret_718; \ }) #else -#define vqrshrun_high_n_s64(__p0_703, __p1_703, __p2_703) __extension__ ({ \ - int32x4_t __ret_703; \ - int32x2_t __s0_703 = __p0_703; \ - int64x2_t __s1_703 = __p1_703; \ - int32x2_t __rev0_703; __rev0_703 = __builtin_shufflevector(__s0_703, __s0_703, 1, 0); \ - int64x2_t __rev1_703; __rev1_703 = __builtin_shufflevector(__s1_703, __s1_703, 1, 0); \ - __ret_703 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_703), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_703, __p2_703)))); \ - __ret_703 = __builtin_shufflevector(__ret_703, __ret_703, 3, 2, 1, 0); \ - __ret_703; \ +#define vqrshrun_high_n_s64(__p0_719, __p1_719, __p2_719) __extension__ ({ \ + int32x4_t __ret_719; \ + int32x2_t __s0_719 = __p0_719; \ + int64x2_t __s1_719 = __p1_719; \ + int32x2_t __rev0_719; __rev0_719 = __builtin_shufflevector(__s0_719, __s0_719, __lane_reverse_64_32); \ + int64x2_t __rev1_719; __rev1_719 = __builtin_shufflevector(__s1_719, __s1_719, __lane_reverse_128_64); \ + __ret_719 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_719), __builtin_bit_cast(int32x2_t, __noswap_vqrshrun_n_s64(__rev1_719, __p2_719)))); \ + __ret_719 = __builtin_shufflevector(__ret_719, __ret_719, __lane_reverse_128_32); \ + __ret_719; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s16(__p0_704, __p1_704, __p2_704) __extension__ ({ \ - int8x16_t __ret_704; \ - int8x8_t __s0_704 = __p0_704; \ - int16x8_t __s1_704 = __p1_704; \ - __ret_704 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_704), (int8x8_t)(vqrshrun_n_s16(__s1_704, __p2_704)))); \ - __ret_704; \ +#define vqrshrun_high_n_s16(__p0_720, __p1_720, __p2_720) __extension__ ({ \ + int8x16_t __ret_720; \ + int8x8_t __s0_720 = __p0_720; \ + int16x8_t __s1_720 = __p1_720; \ + __ret_720 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_720), __builtin_bit_cast(int8x8_t, vqrshrun_n_s16(__s1_720, __p2_720)))); \ + __ret_720; \ }) #else -#define vqrshrun_high_n_s16(__p0_705, __p1_705, __p2_705) __extension__ ({ \ - int8x16_t __ret_705; \ - int8x8_t __s0_705 = __p0_705; \ - int16x8_t __s1_705 = __p1_705; \ - int8x8_t __rev0_705; __rev0_705 = __builtin_shufflevector(__s0_705, __s0_705, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_705; __rev1_705 = __builtin_shufflevector(__s1_705, __s1_705, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_705 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_705), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_705, __p2_705)))); \ - __ret_705 = __builtin_shufflevector(__ret_705, __ret_705, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_705; \ +#define vqrshrun_high_n_s16(__p0_721, __p1_721, __p2_721) __extension__ ({ \ + int8x16_t __ret_721; \ + int8x8_t __s0_721 = __p0_721; \ + int16x8_t __s1_721 = __p1_721; \ + int8x8_t __rev0_721; __rev0_721 = __builtin_shufflevector(__s0_721, __s0_721, __lane_reverse_64_8); \ + int16x8_t __rev1_721; __rev1_721 = __builtin_shufflevector(__s1_721, __s1_721, __lane_reverse_128_16); \ + __ret_721 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_721), __builtin_bit_cast(int8x8_t, __noswap_vqrshrun_n_s16(__rev1_721, __p2_721)))); \ + __ret_721 = __builtin_shufflevector(__ret_721, __ret_721, __lane_reverse_128_8); \ + __ret_721; \ }) #endif #define vqrshruns_n_s32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ int32_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vqrshruns_n_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqrshruns_n_s32(__s0, __p1)); \ __ret; \ }) #define vqrshrund_n_s64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ int64_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vqrshrund_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqrshrund_n_s64(__s0, __p1)); \ __ret; \ }) #define vqrshrunh_n_s16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ int16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqrshrunh_n_s16(__s0, __p1)); \ __ret; \ }) __ai __attribute__((target("neon"))) uint8_t vqshlb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqshlb_u8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vqshls_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqshls_u32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vqshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vqshld_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint16_t vqshlh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqshlh_u16(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqshlb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqshlb_s8(__p0, __p1); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqshlb_s8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqshls_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqshls_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqshls_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqshld_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqshld_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqshlh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqshlh_s16(__p0, __p1)); return __ret; } #define vqshlb_n_u8(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint8_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vqshlb_n_u8(__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqshlb_n_u8(__s0, __p1)); \ __ret; \ }) #define vqshls_n_u32(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint32_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vqshls_n_u32(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqshls_n_u32(__s0, __p1)); \ __ret; \ }) #define vqshld_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vqshld_n_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vqshld_n_u64(__s0, __p1)); \ __ret; \ }) #define vqshlh_n_u16(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint16_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vqshlh_n_u16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqshlh_n_u16(__s0, __p1)); \ __ret; \ }) #define vqshlb_n_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vqshlb_n_s8(__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqshlb_n_s8(__s0, __p1)); \ __ret; \ }) #define vqshls_n_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vqshls_n_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqshls_n_s32(__s0, __p1)); \ __ret; \ }) #define vqshld_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vqshld_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqshld_n_s64(__s0, __p1)); \ __ret; \ }) #define vqshlh_n_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vqshlh_n_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqshlh_n_s16(__s0, __p1)); \ __ret; \ }) #define vqshlub_n_s8(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int8_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vqshlub_n_s8(__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqshlub_n_s8(__s0, __p1)); \ __ret; \ }) #define vqshlus_n_s32(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int32_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vqshlus_n_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqshlus_n_s32(__s0, __p1)); \ __ret; \ }) #define vqshlud_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vqshlud_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqshlud_n_s64(__s0, __p1)); \ __ret; \ }) #define vqshluh_n_s16(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int16_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vqshluh_n_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqshluh_n_s16(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u32(__p0_706, __p1_706, __p2_706) __extension__ ({ \ - uint16x8_t __ret_706; \ - uint16x4_t __s0_706 = __p0_706; \ - uint32x4_t __s1_706 = __p1_706; \ - __ret_706 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_706), (uint16x4_t)(vqshrn_n_u32(__s1_706, __p2_706)))); \ - __ret_706; \ +#define vqshrn_high_n_u32(__p0_722, __p1_722, __p2_722) __extension__ ({ \ + uint16x8_t __ret_722; \ + uint16x4_t __s0_722 = __p0_722; \ + uint32x4_t __s1_722 = __p1_722; \ + __ret_722 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_722), __builtin_bit_cast(uint16x4_t, vqshrn_n_u32(__s1_722, __p2_722)))); \ + __ret_722; \ }) #else -#define vqshrn_high_n_u32(__p0_707, __p1_707, __p2_707) __extension__ ({ \ - uint16x8_t __ret_707; \ - uint16x4_t __s0_707 = __p0_707; \ - uint32x4_t __s1_707 = __p1_707; \ - uint16x4_t __rev0_707; __rev0_707 = __builtin_shufflevector(__s0_707, __s0_707, 3, 2, 1, 0); \ - uint32x4_t __rev1_707; __rev1_707 = __builtin_shufflevector(__s1_707, __s1_707, 3, 2, 1, 0); \ - __ret_707 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_707), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_707, __p2_707)))); \ - __ret_707 = __builtin_shufflevector(__ret_707, __ret_707, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_707; \ +#define vqshrn_high_n_u32(__p0_723, __p1_723, __p2_723) __extension__ ({ \ + uint16x8_t __ret_723; \ + uint16x4_t __s0_723 = __p0_723; \ + uint32x4_t __s1_723 = __p1_723; \ + uint16x4_t __rev0_723; __rev0_723 = __builtin_shufflevector(__s0_723, __s0_723, __lane_reverse_64_16); \ + uint32x4_t __rev1_723; __rev1_723 = __builtin_shufflevector(__s1_723, __s1_723, __lane_reverse_128_32); \ + __ret_723 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_723), __builtin_bit_cast(uint16x4_t, __noswap_vqshrn_n_u32(__rev1_723, __p2_723)))); \ + __ret_723 = __builtin_shufflevector(__ret_723, __ret_723, __lane_reverse_128_16); \ + __ret_723; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u64(__p0_708, __p1_708, __p2_708) __extension__ ({ \ - uint32x4_t __ret_708; \ - uint32x2_t __s0_708 = __p0_708; \ - uint64x2_t __s1_708 = __p1_708; \ - __ret_708 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_708), (uint32x2_t)(vqshrn_n_u64(__s1_708, __p2_708)))); \ - __ret_708; \ +#define vqshrn_high_n_u64(__p0_724, __p1_724, __p2_724) __extension__ ({ \ + uint32x4_t __ret_724; \ + uint32x2_t __s0_724 = __p0_724; \ + uint64x2_t __s1_724 = __p1_724; \ + __ret_724 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_724), __builtin_bit_cast(uint32x2_t, vqshrn_n_u64(__s1_724, __p2_724)))); \ + __ret_724; \ }) #else -#define vqshrn_high_n_u64(__p0_709, __p1_709, __p2_709) __extension__ ({ \ - uint32x4_t __ret_709; \ - uint32x2_t __s0_709 = __p0_709; \ - uint64x2_t __s1_709 = __p1_709; \ - uint32x2_t __rev0_709; __rev0_709 = __builtin_shufflevector(__s0_709, __s0_709, 1, 0); \ - uint64x2_t __rev1_709; __rev1_709 = __builtin_shufflevector(__s1_709, __s1_709, 1, 0); \ - __ret_709 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_709), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_709, __p2_709)))); \ - __ret_709 = __builtin_shufflevector(__ret_709, __ret_709, 3, 2, 1, 0); \ - __ret_709; \ +#define vqshrn_high_n_u64(__p0_725, __p1_725, __p2_725) __extension__ ({ \ + uint32x4_t __ret_725; \ + uint32x2_t __s0_725 = __p0_725; \ + uint64x2_t __s1_725 = __p1_725; \ + uint32x2_t __rev0_725; __rev0_725 = __builtin_shufflevector(__s0_725, __s0_725, __lane_reverse_64_32); \ + uint64x2_t __rev1_725; __rev1_725 = __builtin_shufflevector(__s1_725, __s1_725, __lane_reverse_128_64); \ + __ret_725 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_725), __builtin_bit_cast(uint32x2_t, __noswap_vqshrn_n_u64(__rev1_725, __p2_725)))); \ + __ret_725 = __builtin_shufflevector(__ret_725, __ret_725, __lane_reverse_128_32); \ + __ret_725; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u16(__p0_710, __p1_710, __p2_710) __extension__ ({ \ - uint8x16_t __ret_710; \ - uint8x8_t __s0_710 = __p0_710; \ - uint16x8_t __s1_710 = __p1_710; \ - __ret_710 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_710), (uint8x8_t)(vqshrn_n_u16(__s1_710, __p2_710)))); \ - __ret_710; \ +#define vqshrn_high_n_u16(__p0_726, __p1_726, __p2_726) __extension__ ({ \ + uint8x16_t __ret_726; \ + uint8x8_t __s0_726 = __p0_726; \ + uint16x8_t __s1_726 = __p1_726; \ + __ret_726 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_726), __builtin_bit_cast(uint8x8_t, vqshrn_n_u16(__s1_726, __p2_726)))); \ + __ret_726; \ }) #else -#define vqshrn_high_n_u16(__p0_711, __p1_711, __p2_711) __extension__ ({ \ - uint8x16_t __ret_711; \ - uint8x8_t __s0_711 = __p0_711; \ - uint16x8_t __s1_711 = __p1_711; \ - uint8x8_t __rev0_711; __rev0_711 = __builtin_shufflevector(__s0_711, __s0_711, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_711; __rev1_711 = __builtin_shufflevector(__s1_711, __s1_711, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_711 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_711), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_711, __p2_711)))); \ - __ret_711 = __builtin_shufflevector(__ret_711, __ret_711, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_711; \ +#define vqshrn_high_n_u16(__p0_727, __p1_727, __p2_727) __extension__ ({ \ + uint8x16_t __ret_727; \ + uint8x8_t __s0_727 = __p0_727; \ + uint16x8_t __s1_727 = __p1_727; \ + uint8x8_t __rev0_727; __rev0_727 = __builtin_shufflevector(__s0_727, __s0_727, __lane_reverse_64_8); \ + uint16x8_t __rev1_727; __rev1_727 = __builtin_shufflevector(__s1_727, __s1_727, __lane_reverse_128_16); \ + __ret_727 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_727), __builtin_bit_cast(uint8x8_t, __noswap_vqshrn_n_u16(__rev1_727, __p2_727)))); \ + __ret_727 = __builtin_shufflevector(__ret_727, __ret_727, __lane_reverse_128_8); \ + __ret_727; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s32(__p0_712, __p1_712, __p2_712) __extension__ ({ \ - int16x8_t __ret_712; \ - int16x4_t __s0_712 = __p0_712; \ - int32x4_t __s1_712 = __p1_712; \ - __ret_712 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_712), (int16x4_t)(vqshrn_n_s32(__s1_712, __p2_712)))); \ - __ret_712; \ +#define vqshrn_high_n_s32(__p0_728, __p1_728, __p2_728) __extension__ ({ \ + int16x8_t __ret_728; \ + int16x4_t __s0_728 = __p0_728; \ + int32x4_t __s1_728 = __p1_728; \ + __ret_728 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_728), __builtin_bit_cast(int16x4_t, vqshrn_n_s32(__s1_728, __p2_728)))); \ + __ret_728; \ }) #else -#define vqshrn_high_n_s32(__p0_713, __p1_713, __p2_713) __extension__ ({ \ - int16x8_t __ret_713; \ - int16x4_t __s0_713 = __p0_713; \ - int32x4_t __s1_713 = __p1_713; \ - int16x4_t __rev0_713; __rev0_713 = __builtin_shufflevector(__s0_713, __s0_713, 3, 2, 1, 0); \ - int32x4_t __rev1_713; __rev1_713 = __builtin_shufflevector(__s1_713, __s1_713, 3, 2, 1, 0); \ - __ret_713 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_713), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_713, __p2_713)))); \ - __ret_713 = __builtin_shufflevector(__ret_713, __ret_713, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_713; \ +#define vqshrn_high_n_s32(__p0_729, __p1_729, __p2_729) __extension__ ({ \ + int16x8_t __ret_729; \ + int16x4_t __s0_729 = __p0_729; \ + int32x4_t __s1_729 = __p1_729; \ + int16x4_t __rev0_729; __rev0_729 = __builtin_shufflevector(__s0_729, __s0_729, __lane_reverse_64_16); \ + int32x4_t __rev1_729; __rev1_729 = __builtin_shufflevector(__s1_729, __s1_729, __lane_reverse_128_32); \ + __ret_729 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_729), __builtin_bit_cast(int16x4_t, __noswap_vqshrn_n_s32(__rev1_729, __p2_729)))); \ + __ret_729 = __builtin_shufflevector(__ret_729, __ret_729, __lane_reverse_128_16); \ + __ret_729; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s64(__p0_714, __p1_714, __p2_714) __extension__ ({ \ - int32x4_t __ret_714; \ - int32x2_t __s0_714 = __p0_714; \ - int64x2_t __s1_714 = __p1_714; \ - __ret_714 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_714), (int32x2_t)(vqshrn_n_s64(__s1_714, __p2_714)))); \ - __ret_714; \ +#define vqshrn_high_n_s64(__p0_730, __p1_730, __p2_730) __extension__ ({ \ + int32x4_t __ret_730; \ + int32x2_t __s0_730 = __p0_730; \ + int64x2_t __s1_730 = __p1_730; \ + __ret_730 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_730), __builtin_bit_cast(int32x2_t, vqshrn_n_s64(__s1_730, __p2_730)))); \ + __ret_730; \ }) #else -#define vqshrn_high_n_s64(__p0_715, __p1_715, __p2_715) __extension__ ({ \ - int32x4_t __ret_715; \ - int32x2_t __s0_715 = __p0_715; \ - int64x2_t __s1_715 = __p1_715; \ - int32x2_t __rev0_715; __rev0_715 = __builtin_shufflevector(__s0_715, __s0_715, 1, 0); \ - int64x2_t __rev1_715; __rev1_715 = __builtin_shufflevector(__s1_715, __s1_715, 1, 0); \ - __ret_715 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_715), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_715, __p2_715)))); \ - __ret_715 = __builtin_shufflevector(__ret_715, __ret_715, 3, 2, 1, 0); \ - __ret_715; \ +#define vqshrn_high_n_s64(__p0_731, __p1_731, __p2_731) __extension__ ({ \ + int32x4_t __ret_731; \ + int32x2_t __s0_731 = __p0_731; \ + int64x2_t __s1_731 = __p1_731; \ + int32x2_t __rev0_731; __rev0_731 = __builtin_shufflevector(__s0_731, __s0_731, __lane_reverse_64_32); \ + int64x2_t __rev1_731; __rev1_731 = __builtin_shufflevector(__s1_731, __s1_731, __lane_reverse_128_64); \ + __ret_731 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_731), __builtin_bit_cast(int32x2_t, __noswap_vqshrn_n_s64(__rev1_731, __p2_731)))); \ + __ret_731 = __builtin_shufflevector(__ret_731, __ret_731, __lane_reverse_128_32); \ + __ret_731; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s16(__p0_716, __p1_716, __p2_716) __extension__ ({ \ - int8x16_t __ret_716; \ - int8x8_t __s0_716 = __p0_716; \ - int16x8_t __s1_716 = __p1_716; \ - __ret_716 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_716), (int8x8_t)(vqshrn_n_s16(__s1_716, __p2_716)))); \ - __ret_716; \ +#define vqshrn_high_n_s16(__p0_732, __p1_732, __p2_732) __extension__ ({ \ + int8x16_t __ret_732; \ + int8x8_t __s0_732 = __p0_732; \ + int16x8_t __s1_732 = __p1_732; \ + __ret_732 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_732), __builtin_bit_cast(int8x8_t, vqshrn_n_s16(__s1_732, __p2_732)))); \ + __ret_732; \ }) #else -#define vqshrn_high_n_s16(__p0_717, __p1_717, __p2_717) __extension__ ({ \ - int8x16_t __ret_717; \ - int8x8_t __s0_717 = __p0_717; \ - int16x8_t __s1_717 = __p1_717; \ - int8x8_t __rev0_717; __rev0_717 = __builtin_shufflevector(__s0_717, __s0_717, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_717; __rev1_717 = __builtin_shufflevector(__s1_717, __s1_717, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_717 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_717), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_717, __p2_717)))); \ - __ret_717 = __builtin_shufflevector(__ret_717, __ret_717, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_717; \ +#define vqshrn_high_n_s16(__p0_733, __p1_733, __p2_733) __extension__ ({ \ + int8x16_t __ret_733; \ + int8x8_t __s0_733 = __p0_733; \ + int16x8_t __s1_733 = __p1_733; \ + int8x8_t __rev0_733; __rev0_733 = __builtin_shufflevector(__s0_733, __s0_733, __lane_reverse_64_8); \ + int16x8_t __rev1_733; __rev1_733 = __builtin_shufflevector(__s1_733, __s1_733, __lane_reverse_128_16); \ + __ret_733 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_733), __builtin_bit_cast(int8x8_t, __noswap_vqshrn_n_s16(__rev1_733, __p2_733)))); \ + __ret_733 = __builtin_shufflevector(__ret_733, __ret_733, __lane_reverse_128_8); \ + __ret_733; \ }) #endif #define vqshrns_n_u32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ uint32_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vqshrns_n_u32(__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqshrns_n_u32(__s0, __p1)); \ __ret; \ }) #define vqshrnd_n_u64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vqshrnd_n_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqshrnd_n_u64(__s0, __p1)); \ __ret; \ }) #define vqshrnh_n_u16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ uint16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vqshrnh_n_u16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqshrnh_n_u16(__s0, __p1)); \ __ret; \ }) #define vqshrns_n_s32(__p0, __p1) __extension__ ({ \ int16_t __ret; \ int32_t __s0 = __p0; \ - __ret = (int16_t) __builtin_neon_vqshrns_n_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqshrns_n_s32(__s0, __p1)); \ __ret; \ }) #define vqshrnd_n_s64(__p0, __p1) __extension__ ({ \ int32_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int32_t) __builtin_neon_vqshrnd_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqshrnd_n_s64(__s0, __p1)); \ __ret; \ }) #define vqshrnh_n_s16(__p0, __p1) __extension__ ({ \ int8_t __ret; \ int16_t __s0 = __p0; \ - __ret = (int8_t) __builtin_neon_vqshrnh_n_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqshrnh_n_s16(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s32(__p0_718, __p1_718, __p2_718) __extension__ ({ \ - int16x8_t __ret_718; \ - int16x4_t __s0_718 = __p0_718; \ - int32x4_t __s1_718 = __p1_718; \ - __ret_718 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_718), (int16x4_t)(vqshrun_n_s32(__s1_718, __p2_718)))); \ - __ret_718; \ +#define vqshrun_high_n_s32(__p0_734, __p1_734, __p2_734) __extension__ ({ \ + int16x8_t __ret_734; \ + int16x4_t __s0_734 = __p0_734; \ + int32x4_t __s1_734 = __p1_734; \ + __ret_734 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_734), __builtin_bit_cast(int16x4_t, vqshrun_n_s32(__s1_734, __p2_734)))); \ + __ret_734; \ }) #else -#define vqshrun_high_n_s32(__p0_719, __p1_719, __p2_719) __extension__ ({ \ - int16x8_t __ret_719; \ - int16x4_t __s0_719 = __p0_719; \ - int32x4_t __s1_719 = __p1_719; \ - int16x4_t __rev0_719; __rev0_719 = __builtin_shufflevector(__s0_719, __s0_719, 3, 2, 1, 0); \ - int32x4_t __rev1_719; __rev1_719 = __builtin_shufflevector(__s1_719, __s1_719, 3, 2, 1, 0); \ - __ret_719 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_719), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_719, __p2_719)))); \ - __ret_719 = __builtin_shufflevector(__ret_719, __ret_719, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_719; \ +#define vqshrun_high_n_s32(__p0_735, __p1_735, __p2_735) __extension__ ({ \ + int16x8_t __ret_735; \ + int16x4_t __s0_735 = __p0_735; \ + int32x4_t __s1_735 = __p1_735; \ + int16x4_t __rev0_735; __rev0_735 = __builtin_shufflevector(__s0_735, __s0_735, __lane_reverse_64_16); \ + int32x4_t __rev1_735; __rev1_735 = __builtin_shufflevector(__s1_735, __s1_735, __lane_reverse_128_32); \ + __ret_735 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_735), __builtin_bit_cast(int16x4_t, __noswap_vqshrun_n_s32(__rev1_735, __p2_735)))); \ + __ret_735 = __builtin_shufflevector(__ret_735, __ret_735, __lane_reverse_128_16); \ + __ret_735; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s64(__p0_720, __p1_720, __p2_720) __extension__ ({ \ - int32x4_t __ret_720; \ - int32x2_t __s0_720 = __p0_720; \ - int64x2_t __s1_720 = __p1_720; \ - __ret_720 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_720), (int32x2_t)(vqshrun_n_s64(__s1_720, __p2_720)))); \ - __ret_720; \ +#define vqshrun_high_n_s64(__p0_736, __p1_736, __p2_736) __extension__ ({ \ + int32x4_t __ret_736; \ + int32x2_t __s0_736 = __p0_736; \ + int64x2_t __s1_736 = __p1_736; \ + __ret_736 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_736), __builtin_bit_cast(int32x2_t, vqshrun_n_s64(__s1_736, __p2_736)))); \ + __ret_736; \ }) #else -#define vqshrun_high_n_s64(__p0_721, __p1_721, __p2_721) __extension__ ({ \ - int32x4_t __ret_721; \ - int32x2_t __s0_721 = __p0_721; \ - int64x2_t __s1_721 = __p1_721; \ - int32x2_t __rev0_721; __rev0_721 = __builtin_shufflevector(__s0_721, __s0_721, 1, 0); \ - int64x2_t __rev1_721; __rev1_721 = __builtin_shufflevector(__s1_721, __s1_721, 1, 0); \ - __ret_721 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_721), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_721, __p2_721)))); \ - __ret_721 = __builtin_shufflevector(__ret_721, __ret_721, 3, 2, 1, 0); \ - __ret_721; \ +#define vqshrun_high_n_s64(__p0_737, __p1_737, __p2_737) __extension__ ({ \ + int32x4_t __ret_737; \ + int32x2_t __s0_737 = __p0_737; \ + int64x2_t __s1_737 = __p1_737; \ + int32x2_t __rev0_737; __rev0_737 = __builtin_shufflevector(__s0_737, __s0_737, __lane_reverse_64_32); \ + int64x2_t __rev1_737; __rev1_737 = __builtin_shufflevector(__s1_737, __s1_737, __lane_reverse_128_64); \ + __ret_737 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_737), __builtin_bit_cast(int32x2_t, __noswap_vqshrun_n_s64(__rev1_737, __p2_737)))); \ + __ret_737 = __builtin_shufflevector(__ret_737, __ret_737, __lane_reverse_128_32); \ + __ret_737; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s16(__p0_722, __p1_722, __p2_722) __extension__ ({ \ - int8x16_t __ret_722; \ - int8x8_t __s0_722 = __p0_722; \ - int16x8_t __s1_722 = __p1_722; \ - __ret_722 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_722), (int8x8_t)(vqshrun_n_s16(__s1_722, __p2_722)))); \ - __ret_722; \ +#define vqshrun_high_n_s16(__p0_738, __p1_738, __p2_738) __extension__ ({ \ + int8x16_t __ret_738; \ + int8x8_t __s0_738 = __p0_738; \ + int16x8_t __s1_738 = __p1_738; \ + __ret_738 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_738), __builtin_bit_cast(int8x8_t, vqshrun_n_s16(__s1_738, __p2_738)))); \ + __ret_738; \ }) #else -#define vqshrun_high_n_s16(__p0_723, __p1_723, __p2_723) __extension__ ({ \ - int8x16_t __ret_723; \ - int8x8_t __s0_723 = __p0_723; \ - int16x8_t __s1_723 = __p1_723; \ - int8x8_t __rev0_723; __rev0_723 = __builtin_shufflevector(__s0_723, __s0_723, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_723; __rev1_723 = __builtin_shufflevector(__s1_723, __s1_723, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_723 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_723), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_723, __p2_723)))); \ - __ret_723 = __builtin_shufflevector(__ret_723, __ret_723, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_723; \ +#define vqshrun_high_n_s16(__p0_739, __p1_739, __p2_739) __extension__ ({ \ + int8x16_t __ret_739; \ + int8x8_t __s0_739 = __p0_739; \ + int16x8_t __s1_739 = __p1_739; \ + int8x8_t __rev0_739; __rev0_739 = __builtin_shufflevector(__s0_739, __s0_739, __lane_reverse_64_8); \ + int16x8_t __rev1_739; __rev1_739 = __builtin_shufflevector(__s1_739, __s1_739, __lane_reverse_128_16); \ + __ret_739 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_739), __builtin_bit_cast(int8x8_t, __noswap_vqshrun_n_s16(__rev1_739, __p2_739)))); \ + __ret_739 = __builtin_shufflevector(__ret_739, __ret_739, __lane_reverse_128_8); \ + __ret_739; \ }) #endif #define vqshruns_n_s32(__p0, __p1) __extension__ ({ \ uint16_t __ret; \ int32_t __s0 = __p0; \ - __ret = (uint16_t) __builtin_neon_vqshruns_n_s32(__s0, __p1); \ + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqshruns_n_s32(__s0, __p1)); \ __ret; \ }) #define vqshrund_n_s64(__p0, __p1) __extension__ ({ \ uint32_t __ret; \ int64_t __s0 = __p0; \ - __ret = (uint32_t) __builtin_neon_vqshrund_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqshrund_n_s64(__s0, __p1)); \ __ret; \ }) #define vqshrunh_n_s16(__p0, __p1) __extension__ ({ \ uint8_t __ret; \ int16_t __s0 = __p0; \ - __ret = (uint8_t) __builtin_neon_vqshrunh_n_s16(__s0, __p1); \ + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqshrunh_n_s16(__s0, __p1)); \ __ret; \ }) __ai __attribute__((target("neon"))) uint8_t vqsubb_u8(uint8_t __p0, uint8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vqsubb_u8(__p0, __p1); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vqsubb_u8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vqsubs_u32(uint32_t __p0, uint32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vqsubs_u32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vqsubs_u32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vqsubd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vqsubd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vqsubd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint16_t vqsubh_u16(uint16_t __p0, uint16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vqsubh_u16(__p0, __p1); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vqsubh_u16(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int8_t vqsubb_s8(int8_t __p0, int8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vqsubb_s8(__p0, __p1); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vqsubb_s8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int32_t vqsubs_s32(int32_t __p0, int32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqsubs_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqsubs_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vqsubd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vqsubd_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vqsubd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vqsubh_s16(int16_t __p0, int16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqsubh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqsubh_s16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58579,16 +60237,16 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbl1_p8(poly8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58596,16 +60254,16 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbl1q_p8(poly8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58613,16 +60271,33 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl1q_mf8(mfloat8x16_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl1q_mf8(mfloat8x16_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58630,16 +60305,16 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58647,16 +60322,33 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_t __p1) { int8x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl1_mf8(mfloat8x16_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl1_mf8(mfloat8x16_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl1_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58664,18 +60356,18 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58683,18 +60375,18 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbl2_p8(poly8x16x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p1), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58702,18 +60394,18 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbl2q_p8(poly8x16x2_t __p0, ui #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58721,18 +60413,37 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, ui #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl2q_mf8(mfloat8x16x2_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p1), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl2q_mf8(mfloat8x16x2_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl2q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev1), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58740,18 +60451,18 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58759,18 +60470,37 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16x2_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl2_mf8(mfloat8x16x2_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl2_mf8(mfloat8x16x2_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x16x2_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl2_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58778,19 +60508,19 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58798,19 +60528,19 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbl3_p8(poly8x16x3_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p1), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev1), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58818,19 +60548,19 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbl3q_p8(poly8x16x3_t __p0, ui #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58838,19 +60568,39 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, ui #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl3q_mf8(mfloat8x16x3_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p1), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl3q_mf8(mfloat8x16x3_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl3q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev1), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58858,19 +60608,19 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58878,19 +60628,39 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16x3_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl3_mf8(mfloat8x16x3_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl3_mf8(mfloat8x16x3_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x16x3_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl3_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58898,20 +60668,20 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; poly8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -58919,20 +60689,20 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbl4_p8(poly8x16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x16_t, __p1), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, uint8x16_t __p1) { poly8x16_t __ret; poly8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x16_t, __rev1), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58940,20 +60710,20 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbl4q_p8(poly8x16x4_t __p0, ui #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { uint8x16_t __ret; uint8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58961,20 +60731,41 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, ui #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint8x16_t __p1) { int8x16_t __ret; int8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl4q_mf8(mfloat8x16x4_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x16_t, __p1), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbl4q_mf8(mfloat8x16x4_t __p0, uint8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbl4q_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x16_t, __rev1), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -58982,20 +60773,20 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; uint8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59003,20 +60794,41 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x8_t __p1) { int8x8_t __ret; int8x16x4_t __rev0; - __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl4_mf8(mfloat8x16x4_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __p0.val[0]), __builtin_bit_cast(int8x16_t, __p0.val[1]), __builtin_bit_cast(int8x16_t, __p0.val[2]), __builtin_bit_cast(int8x16_t, __p0.val[3]), __builtin_bit_cast(int8x8_t, __p1), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbl4_mf8(mfloat8x16x4_t __p0, uint8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x16x4_t __rev0; + __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], __lane_reverse_128_8); + __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], __lane_reverse_128_8); + __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], __lane_reverse_128_8); + __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], __lane_reverse_128_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbl4_v(__builtin_bit_cast(int8x16_t, __rev0.val[0]), __builtin_bit_cast(int8x16_t, __rev0.val[1]), __builtin_bit_cast(int8x16_t, __rev0.val[2]), __builtin_bit_cast(int8x16_t, __rev0.val[3]), __builtin_bit_cast(int8x8_t, __rev1), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59024,17 +60836,17 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x16_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59042,17 +60854,17 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbx1_p8(poly8x8_t __p0, poly8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly8x16_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59060,17 +60872,17 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbx1q_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59078,17 +60890,35 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, uint8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, uint8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx1q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59096,17 +60926,17 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59114,17 +60944,35 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, uint8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, uint8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx1_mf8(mfloat8x8_t __p0, mfloat8x16_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx1_mf8(mfloat8x8_t __p0, mfloat8x16_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx1_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59132,19 +60980,19 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x16x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); poly8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59152,19 +61000,19 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbx2_p8(poly8x8_t __p0, poly8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p2), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly8x16x2_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); poly8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev2), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59172,19 +61020,19 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbx2q_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); uint8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59192,19 +61040,39 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, uint8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, uint8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); int8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx2q_mf8(mfloat8x16_t __p0, mfloat8x16x2_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p2), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx2q_mf8(mfloat8x16_t __p0, mfloat8x16x2_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx2q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev2), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59212,19 +61080,19 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); uint8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59232,19 +61100,39 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, uint8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, uint8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); int8x16x2_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx2_mf8(mfloat8x8_t __p0, mfloat8x16x2_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx2_mf8(mfloat8x8_t __p0, mfloat8x16x2_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x16x2_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx2_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59252,20 +61140,20 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x16x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); poly8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59273,20 +61161,20 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbx3_p8(poly8x8_t __p0, poly8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p2), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly8x16x3_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); poly8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev2), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59294,20 +61182,20 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbx3q_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); uint8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59315,20 +61203,41 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, uint8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, uint8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); int8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx3q_mf8(mfloat8x16_t __p0, mfloat8x16x3_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p2), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx3q_mf8(mfloat8x16_t __p0, mfloat8x16x3_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx3q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev2), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59336,20 +61245,20 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); uint8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59357,20 +61266,41 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, uint8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, uint8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); int8x16x3_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx3_mf8(mfloat8x8_t __p0, mfloat8x16x3_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx3_mf8(mfloat8x8_t __p0, mfloat8x16x3_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x16x3_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx3_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59378,21 +61308,21 @@ __ai __attribute__((target("neon"))) int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x16x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); poly8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59400,21 +61330,21 @@ __ai __attribute__((target("neon"))) poly8x8_t vqtbx4_p8(poly8x8_t __p0, poly8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x16_t, __p2), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly8x16x4_t __p1, uint8x16_t __p2) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); poly8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x16_t, __rev2), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59422,21 +61352,21 @@ __ai __attribute__((target("neon"))) poly8x16_t vqtbx4q_p8(poly8x16_t __p0, poly #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); uint8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59444,21 +61374,43 @@ __ai __attribute__((target("neon"))) uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, uint8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, uint8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); int8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx4q_mf8(mfloat8x16_t __p0, mfloat8x16x4_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x16_t, __p2), 44)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vqtbx4q_mf8(mfloat8x16_t __p0, mfloat8x16x4_t __p1, uint8x16_t __p2) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(mfloat8x16_t, __builtin_neon_vqtbx4q_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x16_t, __rev2), 44)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59466,21 +61418,21 @@ __ai __attribute__((target("neon"))) int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); uint8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59488,21 +61440,43 @@ __ai __attribute__((target("neon"))) uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, uint8x8_t __p2) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, uint8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); int8x16x4_t __rev1; - __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx4_mf8(mfloat8x8_t __p0, mfloat8x16x4_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x16_t, __p1.val[0]), __builtin_bit_cast(int8x16_t, __p1.val[1]), __builtin_bit_cast(int8x16_t, __p1.val[2]), __builtin_bit_cast(int8x16_t, __p1.val[3]), __builtin_bit_cast(int8x8_t, __p2), 12)); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vqtbx4_mf8(mfloat8x8_t __p0, mfloat8x16x4_t __p1, uint8x8_t __p2) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x16x4_t __rev1; + __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], __lane_reverse_128_8); + __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], __lane_reverse_128_8); + __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], __lane_reverse_128_8); + __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], __lane_reverse_128_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); + __ret = __builtin_bit_cast(mfloat8x8_t, __builtin_neon_vqtbx4_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __builtin_bit_cast(int8x8_t, __rev2), 12)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59516,11 +61490,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vraddhn_high_u32(uint16x4_t __p0 #else __ai __attribute__((target("neon"))) uint16x8_t vraddhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_u16(__rev0, __noswap_vraddhn_u32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -59534,11 +61508,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vraddhn_high_u64(uint32x2_t __p0 #else __ai __attribute__((target("neon"))) uint32x4_t vraddhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_u32(__rev0, __noswap_vraddhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -59552,11 +61526,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vraddhn_high_u16(uint8x8_t __p0, #else __ai __attribute__((target("neon"))) uint8x16_t vraddhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_u8(__rev0, __noswap_vraddhn_u16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59570,11 +61544,11 @@ __ai __attribute__((target("neon"))) int16x8_t vraddhn_high_s32(int16x4_t __p0, #else __ai __attribute__((target("neon"))) int16x8_t vraddhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_s16(__rev0, __noswap_vraddhn_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -59588,11 +61562,11 @@ __ai __attribute__((target("neon"))) int32x4_t vraddhn_high_s64(int32x2_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vraddhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_s32(__rev0, __noswap_vraddhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -59606,11 +61580,11 @@ __ai __attribute__((target("neon"))) int8x16_t vraddhn_high_s16(int8x8_t __p0, i #else __ai __attribute__((target("neon"))) int8x16_t vraddhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_s8(__rev0, __noswap_vraddhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59618,15 +61592,15 @@ __ai __attribute__((target("neon"))) int8x16_t vraddhn_high_s16(int8x8_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x8_t vrbit_p8(poly8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 4); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vrbit_v(__builtin_bit_cast(int8x8_t, __p0), 4)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x8_t vrbit_p8(poly8x8_t __p0) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 4); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(poly8x8_t, __builtin_neon_vrbit_v(__builtin_bit_cast(int8x8_t, __rev0), 4)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59634,15 +61608,15 @@ __ai __attribute__((target("neon"))) poly8x8_t vrbit_p8(poly8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) poly8x16_t vrbitq_p8(poly8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 36); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vrbitq_v(__builtin_bit_cast(int8x16_t, __p0), 36)); return __ret; } #else __ai __attribute__((target("neon"))) poly8x16_t vrbitq_p8(poly8x16_t __p0) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (poly8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 36); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(poly8x16_t, __builtin_neon_vrbitq_v(__builtin_bit_cast(int8x16_t, __rev0), 36)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59650,15 +61624,15 @@ __ai __attribute__((target("neon"))) poly8x16_t vrbitq_p8(poly8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vrbitq_u8(uint8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrbitq_v(__builtin_bit_cast(int8x16_t, __p0), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vrbitq_u8(uint8x16_t __p0) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vrbitq_v(__builtin_bit_cast(int8x16_t, __rev0), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59666,15 +61640,15 @@ __ai __attribute__((target("neon"))) uint8x16_t vrbitq_u8(uint8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vrbitq_s8(int8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__p0, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrbitq_v(__builtin_bit_cast(int8x16_t, __p0), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vrbitq_s8(int8x16_t __p0) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vrbitq_v((int8x16_t)__rev0, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vrbitq_v(__builtin_bit_cast(int8x16_t, __rev0), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -59682,15 +61656,15 @@ __ai __attribute__((target("neon"))) int8x16_t vrbitq_s8(int8x16_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vrbit_u8(uint8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrbit_v(__builtin_bit_cast(int8x8_t, __p0), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vrbit_u8(uint8x8_t __p0) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vrbit_v(__builtin_bit_cast(int8x8_t, __rev0), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59698,15 +61672,15 @@ __ai __attribute__((target("neon"))) uint8x8_t vrbit_u8(uint8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vrbit_s8(int8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__p0, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrbit_v(__builtin_bit_cast(int8x8_t, __p0), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vrbit_s8(int8x8_t __p0) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vrbit_v((int8x8_t)__rev0, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vrbit_v(__builtin_bit_cast(int8x8_t, __rev0), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -59714,2549 +61688,2549 @@ __ai __attribute__((target("neon"))) int8x8_t vrbit_s8(int8x8_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrecpeq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrecpeq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrecpeq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrecpeq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrecpeq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrecpe_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrecpe_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } __ai __attribute__((target("neon"))) float64_t vrecped_f64(float64_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecped_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vrecped_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) float32_t vrecpes_f32(float32_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpes_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vrecpes_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrecpsq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrecpsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrecpsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrecpsq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrecps_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrecps_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } __ai __attribute__((target("neon"))) float64_t vrecpsd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpsd_f64(__p0, __p1); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vrecpsd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) float32_t vrecpss_f32(float32_t __p0, float32_t __p1) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpss_f32(__p0, __p1); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vrecpss_f32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) float64_t vrecpxd_f64(float64_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vrecpxd_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vrecpxd_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) float32_t vrecpxs_f32(float32_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vrecpxs_f32(__p0)); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_p64(poly64x1_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_f64(float64x1_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_mf8(mfloat8x8_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; - __ret = (poly8x8_t)(__p0); + __ret = __builtin_bit_cast(poly8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_p8(poly8x8_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_p16(poly16x4_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_u8(uint8x8_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_u32(uint32x2_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_u64(uint64x1_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_u16(uint16x4_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_s8(int8x8_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_f64(float64x1_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_f32(float32x2_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_f16(float16x4_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_s32(int32x2_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_s64(int64x1_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_mf8(mfloat8x8_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { poly64x1_t __ret; - __ret = (poly64x1_t)(__p0); + __ret = __builtin_bit_cast(poly64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_p64(poly64x1_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_f64(float64x1_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_mf8(mfloat8x8_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; - __ret = (poly16x4_t)(__p0); + __ret = __builtin_bit_cast(poly16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_p128(poly128_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_p64(poly64x2_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_f64(float64x2_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_mf8(mfloat8x16_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; - __ret = (poly8x16_t)(__p0); + __ret = __builtin_bit_cast(poly8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_p8(poly8x16_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_p64(poly64x2_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_p16(poly16x8_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_u8(uint8x16_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_u32(uint32x4_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_u64(uint64x2_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_u16(uint16x8_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_s8(int8x16_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_f64(float64x2_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_f32(float32x4_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_f16(float16x8_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_s32(int32x4_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_s64(int64x2_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_mf8(mfloat8x16_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { poly128_t __ret; - __ret = (poly128_t)(__p0); + __ret = __builtin_bit_cast(poly128_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_p8(poly8x16_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_p128(poly128_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_p16(poly16x8_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_u8(uint8x16_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_u32(uint32x4_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_u64(uint64x2_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_u16(uint16x8_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_s8(int8x16_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_f64(float64x2_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_f32(float32x4_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_f16(float16x8_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_s32(int32x4_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_s64(int64x2_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_mf8(mfloat8x16_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { poly64x2_t __ret; - __ret = (poly64x2_t)(__p0); + __ret = __builtin_bit_cast(poly64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_p128(poly128_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_p64(poly64x2_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_f64(float64x2_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_mf8(mfloat8x16_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; - __ret = (poly16x8_t)(__p0); + __ret = __builtin_bit_cast(poly16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_p128(poly128_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_p64(poly64x2_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_f64(float64x2_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_mf8(mfloat8x16_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; - __ret = (uint8x16_t)(__p0); + __ret = __builtin_bit_cast(uint8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_p128(poly128_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_p64(poly64x2_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_f64(float64x2_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_mf8(mfloat8x16_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; - __ret = (uint32x4_t)(__p0); + __ret = __builtin_bit_cast(uint32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_p128(poly128_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_p64(poly64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_f64(float64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_mf8(mfloat8x16_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; - __ret = (uint64x2_t)(__p0); + __ret = __builtin_bit_cast(uint64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_p128(poly128_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_p64(poly64x2_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_f64(float64x2_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_mf8(mfloat8x16_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; - __ret = (uint16x8_t)(__p0); + __ret = __builtin_bit_cast(uint16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_p128(poly128_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_p64(poly64x2_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_f64(float64x2_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_mf8(mfloat8x16_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; - __ret = (int8x16_t)(__p0); + __ret = __builtin_bit_cast(int8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_p8(poly8x16_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_p128(poly128_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_p64(poly64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_p16(poly16x8_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_u8(uint8x16_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_u32(uint32x4_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_u64(uint64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_u16(uint16x8_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_s8(int8x16_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_f32(float32x4_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_f16(float16x8_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_s32(int32x4_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_s64(int64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_mf8(mfloat8x16_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { float64x2_t __ret; - __ret = (float64x2_t)(__p0); + __ret = __builtin_bit_cast(float64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_p128(poly128_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_p64(poly64x2_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_f64(float64x2_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_mf8(mfloat8x16_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; - __ret = (float32x4_t)(__p0); + __ret = __builtin_bit_cast(float32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_p128(poly128_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_p64(poly64x2_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_f64(float64x2_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_mf8(mfloat8x16_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; - __ret = (float16x8_t)(__p0); + __ret = __builtin_bit_cast(float16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_p128(poly128_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_p64(poly64x2_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_f64(float64x2_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_mf8(mfloat8x16_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; - __ret = (int32x4_t)(__p0); + __ret = __builtin_bit_cast(int32x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_p128(poly128_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_p64(poly64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_f64(float64x2_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_mf8(mfloat8x16_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; - __ret = (int64x2_t)(__p0); + __ret = __builtin_bit_cast(int64x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p8(poly8x16_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p128(poly128_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p64(poly64x2_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p16(poly16x8_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u8(uint8x16_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u32(uint32x4_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u64(uint64x2_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u16(uint16x8_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s8(int8x16_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_f64(float64x2_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_f32(float32x4_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_f16(float16x8_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s32(int32x4_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s64(int64x2_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s16(int16x8_t __p0) { mfloat8x16_t __ret; - __ret = (mfloat8x16_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x16_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p128(poly128_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p64(poly64x2_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_f64(float64x2_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_mf8(mfloat8x16_t __p0) { int16x8_t __ret; - __ret = (int16x8_t)(__p0); + __ret = __builtin_bit_cast(int16x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_p64(poly64x1_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_f64(float64x1_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_mf8(mfloat8x8_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; - __ret = (uint8x8_t)(__p0); + __ret = __builtin_bit_cast(uint8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_p64(poly64x1_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_f64(float64x1_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_mf8(mfloat8x8_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; - __ret = (uint32x2_t)(__p0); + __ret = __builtin_bit_cast(uint32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_p64(poly64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_f64(float64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_mf8(mfloat8x8_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; - __ret = (uint64x1_t)(__p0); + __ret = __builtin_bit_cast(uint64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_p64(poly64x1_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_f64(float64x1_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_mf8(mfloat8x8_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; - __ret = (uint16x4_t)(__p0); + __ret = __builtin_bit_cast(uint16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_p64(poly64x1_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_f64(float64x1_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_mf8(mfloat8x8_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; - __ret = (int8x8_t)(__p0); + __ret = __builtin_bit_cast(int8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_p8(poly8x8_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_p64(poly64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_p16(poly16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_u8(uint8x8_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_u32(uint32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_u64(uint64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_u16(uint16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_s8(int8x8_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_f32(float32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_f16(float16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_s32(int32x2_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_s64(int64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_mf8(mfloat8x8_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { float64x1_t __ret; - __ret = (float64x1_t)(__p0); + __ret = __builtin_bit_cast(float64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_p64(poly64x1_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_f64(float64x1_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_mf8(mfloat8x8_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; - __ret = (float32x2_t)(__p0); + __ret = __builtin_bit_cast(float32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_p64(poly64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_f64(float64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_mf8(mfloat8x8_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; - __ret = (float16x4_t)(__p0); + __ret = __builtin_bit_cast(float16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_p64(poly64x1_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_f64(float64x1_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_mf8(mfloat8x8_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; - __ret = (int32x2_t)(__p0); + __ret = __builtin_bit_cast(int32x2_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_p64(poly64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_f64(float64x1_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_mf8(mfloat8x8_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; - __ret = (int64x1_t)(__p0); + __ret = __builtin_bit_cast(int64x1_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_p8(poly8x8_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_p64(poly64x1_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_p16(poly16x4_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u8(uint8x8_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u32(uint32x2_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u64(uint64x1_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u16(uint16x4_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s8(int8x8_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_f64(float64x1_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_f32(float32x2_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_f16(float16x4_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s32(int32x2_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s64(int64x1_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s16(int16x4_t __p0) { mfloat8x8_t __ret; - __ret = (mfloat8x8_t)(__p0); + __ret = __builtin_bit_cast(mfloat8x8_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_p64(poly64x1_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_f64(float64x1_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_mf8(mfloat8x8_t __p0) { int16x4_t __ret; - __ret = (int16x4_t)(__p0); + __ret = __builtin_bit_cast(int16x4_t, __p0); return __ret; } __ai __attribute__((target("neon"))) uint64_t vrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vrshld_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vrshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vrshld_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vrshld_s64(__p0, __p1)); return __ret; } #define vrshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vrshrd_n_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vrshrd_n_u64(__s0, __p1)); \ __ret; \ }) #define vrshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vrshrd_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vrshrd_n_s64(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u32(__p0_724, __p1_724, __p2_724) __extension__ ({ \ - uint16x8_t __ret_724; \ - uint16x4_t __s0_724 = __p0_724; \ - uint32x4_t __s1_724 = __p1_724; \ - __ret_724 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_724), (uint16x4_t)(vrshrn_n_u32(__s1_724, __p2_724)))); \ - __ret_724; \ +#define vrshrn_high_n_u32(__p0_740, __p1_740, __p2_740) __extension__ ({ \ + uint16x8_t __ret_740; \ + uint16x4_t __s0_740 = __p0_740; \ + uint32x4_t __s1_740 = __p1_740; \ + __ret_740 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_740), __builtin_bit_cast(uint16x4_t, vrshrn_n_u32(__s1_740, __p2_740)))); \ + __ret_740; \ }) #else -#define vrshrn_high_n_u32(__p0_725, __p1_725, __p2_725) __extension__ ({ \ - uint16x8_t __ret_725; \ - uint16x4_t __s0_725 = __p0_725; \ - uint32x4_t __s1_725 = __p1_725; \ - uint16x4_t __rev0_725; __rev0_725 = __builtin_shufflevector(__s0_725, __s0_725, 3, 2, 1, 0); \ - uint32x4_t __rev1_725; __rev1_725 = __builtin_shufflevector(__s1_725, __s1_725, 3, 2, 1, 0); \ - __ret_725 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_725), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_725, __p2_725)))); \ - __ret_725 = __builtin_shufflevector(__ret_725, __ret_725, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_725; \ +#define vrshrn_high_n_u32(__p0_741, __p1_741, __p2_741) __extension__ ({ \ + uint16x8_t __ret_741; \ + uint16x4_t __s0_741 = __p0_741; \ + uint32x4_t __s1_741 = __p1_741; \ + uint16x4_t __rev0_741; __rev0_741 = __builtin_shufflevector(__s0_741, __s0_741, __lane_reverse_64_16); \ + uint32x4_t __rev1_741; __rev1_741 = __builtin_shufflevector(__s1_741, __s1_741, __lane_reverse_128_32); \ + __ret_741 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_741), __builtin_bit_cast(uint16x4_t, __noswap_vrshrn_n_u32(__rev1_741, __p2_741)))); \ + __ret_741 = __builtin_shufflevector(__ret_741, __ret_741, __lane_reverse_128_16); \ + __ret_741; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u64(__p0_726, __p1_726, __p2_726) __extension__ ({ \ - uint32x4_t __ret_726; \ - uint32x2_t __s0_726 = __p0_726; \ - uint64x2_t __s1_726 = __p1_726; \ - __ret_726 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_726), (uint32x2_t)(vrshrn_n_u64(__s1_726, __p2_726)))); \ - __ret_726; \ +#define vrshrn_high_n_u64(__p0_742, __p1_742, __p2_742) __extension__ ({ \ + uint32x4_t __ret_742; \ + uint32x2_t __s0_742 = __p0_742; \ + uint64x2_t __s1_742 = __p1_742; \ + __ret_742 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_742), __builtin_bit_cast(uint32x2_t, vrshrn_n_u64(__s1_742, __p2_742)))); \ + __ret_742; \ }) #else -#define vrshrn_high_n_u64(__p0_727, __p1_727, __p2_727) __extension__ ({ \ - uint32x4_t __ret_727; \ - uint32x2_t __s0_727 = __p0_727; \ - uint64x2_t __s1_727 = __p1_727; \ - uint32x2_t __rev0_727; __rev0_727 = __builtin_shufflevector(__s0_727, __s0_727, 1, 0); \ - uint64x2_t __rev1_727; __rev1_727 = __builtin_shufflevector(__s1_727, __s1_727, 1, 0); \ - __ret_727 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_727), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_727, __p2_727)))); \ - __ret_727 = __builtin_shufflevector(__ret_727, __ret_727, 3, 2, 1, 0); \ - __ret_727; \ +#define vrshrn_high_n_u64(__p0_743, __p1_743, __p2_743) __extension__ ({ \ + uint32x4_t __ret_743; \ + uint32x2_t __s0_743 = __p0_743; \ + uint64x2_t __s1_743 = __p1_743; \ + uint32x2_t __rev0_743; __rev0_743 = __builtin_shufflevector(__s0_743, __s0_743, __lane_reverse_64_32); \ + uint64x2_t __rev1_743; __rev1_743 = __builtin_shufflevector(__s1_743, __s1_743, __lane_reverse_128_64); \ + __ret_743 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_743), __builtin_bit_cast(uint32x2_t, __noswap_vrshrn_n_u64(__rev1_743, __p2_743)))); \ + __ret_743 = __builtin_shufflevector(__ret_743, __ret_743, __lane_reverse_128_32); \ + __ret_743; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u16(__p0_728, __p1_728, __p2_728) __extension__ ({ \ - uint8x16_t __ret_728; \ - uint8x8_t __s0_728 = __p0_728; \ - uint16x8_t __s1_728 = __p1_728; \ - __ret_728 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_728), (uint8x8_t)(vrshrn_n_u16(__s1_728, __p2_728)))); \ - __ret_728; \ +#define vrshrn_high_n_u16(__p0_744, __p1_744, __p2_744) __extension__ ({ \ + uint8x16_t __ret_744; \ + uint8x8_t __s0_744 = __p0_744; \ + uint16x8_t __s1_744 = __p1_744; \ + __ret_744 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_744), __builtin_bit_cast(uint8x8_t, vrshrn_n_u16(__s1_744, __p2_744)))); \ + __ret_744; \ }) #else -#define vrshrn_high_n_u16(__p0_729, __p1_729, __p2_729) __extension__ ({ \ - uint8x16_t __ret_729; \ - uint8x8_t __s0_729 = __p0_729; \ - uint16x8_t __s1_729 = __p1_729; \ - uint8x8_t __rev0_729; __rev0_729 = __builtin_shufflevector(__s0_729, __s0_729, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_729; __rev1_729 = __builtin_shufflevector(__s1_729, __s1_729, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_729 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_729), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_729, __p2_729)))); \ - __ret_729 = __builtin_shufflevector(__ret_729, __ret_729, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_729; \ +#define vrshrn_high_n_u16(__p0_745, __p1_745, __p2_745) __extension__ ({ \ + uint8x16_t __ret_745; \ + uint8x8_t __s0_745 = __p0_745; \ + uint16x8_t __s1_745 = __p1_745; \ + uint8x8_t __rev0_745; __rev0_745 = __builtin_shufflevector(__s0_745, __s0_745, __lane_reverse_64_8); \ + uint16x8_t __rev1_745; __rev1_745 = __builtin_shufflevector(__s1_745, __s1_745, __lane_reverse_128_16); \ + __ret_745 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_745), __builtin_bit_cast(uint8x8_t, __noswap_vrshrn_n_u16(__rev1_745, __p2_745)))); \ + __ret_745 = __builtin_shufflevector(__ret_745, __ret_745, __lane_reverse_128_8); \ + __ret_745; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s32(__p0_730, __p1_730, __p2_730) __extension__ ({ \ - int16x8_t __ret_730; \ - int16x4_t __s0_730 = __p0_730; \ - int32x4_t __s1_730 = __p1_730; \ - __ret_730 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_730), (int16x4_t)(vrshrn_n_s32(__s1_730, __p2_730)))); \ - __ret_730; \ +#define vrshrn_high_n_s32(__p0_746, __p1_746, __p2_746) __extension__ ({ \ + int16x8_t __ret_746; \ + int16x4_t __s0_746 = __p0_746; \ + int32x4_t __s1_746 = __p1_746; \ + __ret_746 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_746), __builtin_bit_cast(int16x4_t, vrshrn_n_s32(__s1_746, __p2_746)))); \ + __ret_746; \ }) #else -#define vrshrn_high_n_s32(__p0_731, __p1_731, __p2_731) __extension__ ({ \ - int16x8_t __ret_731; \ - int16x4_t __s0_731 = __p0_731; \ - int32x4_t __s1_731 = __p1_731; \ - int16x4_t __rev0_731; __rev0_731 = __builtin_shufflevector(__s0_731, __s0_731, 3, 2, 1, 0); \ - int32x4_t __rev1_731; __rev1_731 = __builtin_shufflevector(__s1_731, __s1_731, 3, 2, 1, 0); \ - __ret_731 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_731), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_731, __p2_731)))); \ - __ret_731 = __builtin_shufflevector(__ret_731, __ret_731, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_731; \ +#define vrshrn_high_n_s32(__p0_747, __p1_747, __p2_747) __extension__ ({ \ + int16x8_t __ret_747; \ + int16x4_t __s0_747 = __p0_747; \ + int32x4_t __s1_747 = __p1_747; \ + int16x4_t __rev0_747; __rev0_747 = __builtin_shufflevector(__s0_747, __s0_747, __lane_reverse_64_16); \ + int32x4_t __rev1_747; __rev1_747 = __builtin_shufflevector(__s1_747, __s1_747, __lane_reverse_128_32); \ + __ret_747 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_747), __builtin_bit_cast(int16x4_t, __noswap_vrshrn_n_s32(__rev1_747, __p2_747)))); \ + __ret_747 = __builtin_shufflevector(__ret_747, __ret_747, __lane_reverse_128_16); \ + __ret_747; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s64(__p0_732, __p1_732, __p2_732) __extension__ ({ \ - int32x4_t __ret_732; \ - int32x2_t __s0_732 = __p0_732; \ - int64x2_t __s1_732 = __p1_732; \ - __ret_732 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_732), (int32x2_t)(vrshrn_n_s64(__s1_732, __p2_732)))); \ - __ret_732; \ +#define vrshrn_high_n_s64(__p0_748, __p1_748, __p2_748) __extension__ ({ \ + int32x4_t __ret_748; \ + int32x2_t __s0_748 = __p0_748; \ + int64x2_t __s1_748 = __p1_748; \ + __ret_748 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_748), __builtin_bit_cast(int32x2_t, vrshrn_n_s64(__s1_748, __p2_748)))); \ + __ret_748; \ }) #else -#define vrshrn_high_n_s64(__p0_733, __p1_733, __p2_733) __extension__ ({ \ - int32x4_t __ret_733; \ - int32x2_t __s0_733 = __p0_733; \ - int64x2_t __s1_733 = __p1_733; \ - int32x2_t __rev0_733; __rev0_733 = __builtin_shufflevector(__s0_733, __s0_733, 1, 0); \ - int64x2_t __rev1_733; __rev1_733 = __builtin_shufflevector(__s1_733, __s1_733, 1, 0); \ - __ret_733 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_733), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_733, __p2_733)))); \ - __ret_733 = __builtin_shufflevector(__ret_733, __ret_733, 3, 2, 1, 0); \ - __ret_733; \ +#define vrshrn_high_n_s64(__p0_749, __p1_749, __p2_749) __extension__ ({ \ + int32x4_t __ret_749; \ + int32x2_t __s0_749 = __p0_749; \ + int64x2_t __s1_749 = __p1_749; \ + int32x2_t __rev0_749; __rev0_749 = __builtin_shufflevector(__s0_749, __s0_749, __lane_reverse_64_32); \ + int64x2_t __rev1_749; __rev1_749 = __builtin_shufflevector(__s1_749, __s1_749, __lane_reverse_128_64); \ + __ret_749 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_749), __builtin_bit_cast(int32x2_t, __noswap_vrshrn_n_s64(__rev1_749, __p2_749)))); \ + __ret_749 = __builtin_shufflevector(__ret_749, __ret_749, __lane_reverse_128_32); \ + __ret_749; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s16(__p0_734, __p1_734, __p2_734) __extension__ ({ \ - int8x16_t __ret_734; \ - int8x8_t __s0_734 = __p0_734; \ - int16x8_t __s1_734 = __p1_734; \ - __ret_734 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_734), (int8x8_t)(vrshrn_n_s16(__s1_734, __p2_734)))); \ - __ret_734; \ +#define vrshrn_high_n_s16(__p0_750, __p1_750, __p2_750) __extension__ ({ \ + int8x16_t __ret_750; \ + int8x8_t __s0_750 = __p0_750; \ + int16x8_t __s1_750 = __p1_750; \ + __ret_750 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_750), __builtin_bit_cast(int8x8_t, vrshrn_n_s16(__s1_750, __p2_750)))); \ + __ret_750; \ }) #else -#define vrshrn_high_n_s16(__p0_735, __p1_735, __p2_735) __extension__ ({ \ - int8x16_t __ret_735; \ - int8x8_t __s0_735 = __p0_735; \ - int16x8_t __s1_735 = __p1_735; \ - int8x8_t __rev0_735; __rev0_735 = __builtin_shufflevector(__s0_735, __s0_735, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_735; __rev1_735 = __builtin_shufflevector(__s1_735, __s1_735, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_735 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_735), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_735, __p2_735)))); \ - __ret_735 = __builtin_shufflevector(__ret_735, __ret_735, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_735; \ +#define vrshrn_high_n_s16(__p0_751, __p1_751, __p2_751) __extension__ ({ \ + int8x16_t __ret_751; \ + int8x8_t __s0_751 = __p0_751; \ + int16x8_t __s1_751 = __p1_751; \ + int8x8_t __rev0_751; __rev0_751 = __builtin_shufflevector(__s0_751, __s0_751, __lane_reverse_64_8); \ + int16x8_t __rev1_751; __rev1_751 = __builtin_shufflevector(__s1_751, __s1_751, __lane_reverse_128_16); \ + __ret_751 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_751), __builtin_bit_cast(int8x8_t, __noswap_vrshrn_n_s16(__rev1_751, __p2_751)))); \ + __ret_751 = __builtin_shufflevector(__ret_751, __ret_751, __lane_reverse_128_8); \ + __ret_751; \ }) #endif #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrsqrteq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrsqrteq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrsqrteq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrsqrteq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrsqrteq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrsqrte_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrsqrte_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } __ai __attribute__((target("neon"))) float64_t vrsqrted_f64(float64_t __p0) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrted_f64(__p0); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vrsqrted_f64(__p0)); return __ret; } __ai __attribute__((target("neon"))) float32_t vrsqrtes_f32(float32_t __p0) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtes_f32(__p0); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vrsqrtes_f32(__p0)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrsqrtsq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vrsqrtsq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrsqrtsq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vrsqrts_f64(float64x1_t __p0, float64x1_t __p1) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrsqrts_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 10)); return __ret; } __ai __attribute__((target("neon"))) float64_t vrsqrtsd_f64(float64_t __p0, float64_t __p1) { float64_t __ret; - __ret = (float64_t) __builtin_neon_vrsqrtsd_f64(__p0, __p1); + __ret = __builtin_bit_cast(float64_t, __builtin_neon_vrsqrtsd_f64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) float32_t vrsqrtss_f32(float32_t __p0, float32_t __p1) { float32_t __ret; - __ret = (float32_t) __builtin_neon_vrsqrtss_f32(__p0, __p1); + __ret = __builtin_bit_cast(float32_t, __builtin_neon_vrsqrtss_f32(__p0, __p1)); return __ret; } #define vrsrad_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ - __ret = (uint64_t) __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vrsrad_n_u64(__s0, __s1, __p2)); \ __ret; \ }) #define vrsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ - __ret = (int64_t) __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vrsrad_n_s64(__s0, __s1, __p2)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -62268,11 +64242,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vrsubhn_high_u32(uint16x4_t __p0 #else __ai __attribute__((target("neon"))) uint16x8_t vrsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_u16(__rev0, __noswap_vrsubhn_u32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -62286,11 +64260,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vrsubhn_high_u64(uint32x2_t __p0 #else __ai __attribute__((target("neon"))) uint32x4_t vrsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_u32(__rev0, __noswap_vrsubhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -62304,11 +64278,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, #else __ai __attribute__((target("neon"))) uint8x16_t vrsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_u8(__rev0, __noswap_vrsubhn_u16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -62322,11 +64296,11 @@ __ai __attribute__((target("neon"))) int16x8_t vrsubhn_high_s32(int16x4_t __p0, #else __ai __attribute__((target("neon"))) int16x8_t vrsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_s16(__rev0, __noswap_vrsubhn_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -62340,11 +64314,11 @@ __ai __attribute__((target("neon"))) int32x4_t vrsubhn_high_s64(int32x2_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vrsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_s32(__rev0, __noswap_vrsubhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -62358,11 +64332,11 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i #else __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_s8(__rev0, __noswap_vrsubhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -62371,7 +64345,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64x1_t __ret; \ poly64_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ - __ret = (poly64x1_t) __builtin_neon_vset_lane_i64(__s0, (poly64x1_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vset_lane_i64(__s0, __s1, __p2)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -62379,7 +64353,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -62387,16 +64361,16 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vsetq_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vsetq_lane_i64(__s0, (poly64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsetq_lane_i64(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -62406,7 +64380,7 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i float64x2_t __ret; \ float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vsetq_lane_f64(__s0, __s1, __p2)); \ __ret; \ }) #else @@ -62414,16 +64388,16 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i float64x2_t __ret; \ float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__rev1, __p2); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vsetq_lane_f64(__s0, __rev1, __p2)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #define __noswap_vsetq_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64_t __s0 = __p0; \ float64x2_t __s1 = __p1; \ - __ret = (float64x2_t) __builtin_neon_vsetq_lane_f64(__s0, (float64x2_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vsetq_lane_f64(__s0, __s1, __p2)); \ __ret; \ }) #endif @@ -62432,274 +64406,274 @@ __ai __attribute__((target("neon"))) int8x16_t vrsubhn_high_s16(int8x8_t __p0, i float64x1_t __ret; \ float64_t __s0 = __p0; \ float64x1_t __s1 = __p1; \ - __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (float64x1_t)__s1, __p2); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vset_lane_f64(__s0, __s1, __p2)); \ __ret; \ }) __ai __attribute__((target("neon"))) uint64_t vshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vshld_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vshld_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vshld_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vshld_s64(__p0, __p1)); return __ret; } #define vshld_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vshld_n_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vshld_n_u64(__s0, __p1)); \ __ret; \ }) #define vshld_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vshld_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vshld_n_s64(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u8(__p0_736, __p1_736) __extension__ ({ \ - uint16x8_t __ret_736; \ - uint8x16_t __s0_736 = __p0_736; \ - __ret_736 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_736), __p1_736)); \ - __ret_736; \ +#define vshll_high_n_u8(__p0_752, __p1_752) __extension__ ({ \ + uint16x8_t __ret_752; \ + uint8x16_t __s0_752 = __p0_752; \ + __ret_752 = __builtin_bit_cast(uint16x8_t, vshll_n_u8(vget_high_u8(__s0_752), __p1_752)); \ + __ret_752; \ }) #else -#define vshll_high_n_u8(__p0_737, __p1_737) __extension__ ({ \ - uint16x8_t __ret_737; \ - uint8x16_t __s0_737 = __p0_737; \ - uint8x16_t __rev0_737; __rev0_737 = __builtin_shufflevector(__s0_737, __s0_737, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_737 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_737), __p1_737)); \ - __ret_737 = __builtin_shufflevector(__ret_737, __ret_737, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_737; \ +#define vshll_high_n_u8(__p0_753, __p1_753) __extension__ ({ \ + uint16x8_t __ret_753; \ + uint8x16_t __s0_753 = __p0_753; \ + uint8x16_t __rev0_753; __rev0_753 = __builtin_shufflevector(__s0_753, __s0_753, __lane_reverse_128_8); \ + __ret_753 = __builtin_bit_cast(uint16x8_t, __noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_753), __p1_753)); \ + __ret_753 = __builtin_shufflevector(__ret_753, __ret_753, __lane_reverse_128_16); \ + __ret_753; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u32(__p0_738, __p1_738) __extension__ ({ \ - uint64x2_t __ret_738; \ - uint32x4_t __s0_738 = __p0_738; \ - __ret_738 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_738), __p1_738)); \ - __ret_738; \ +#define vshll_high_n_u32(__p0_754, __p1_754) __extension__ ({ \ + uint64x2_t __ret_754; \ + uint32x4_t __s0_754 = __p0_754; \ + __ret_754 = __builtin_bit_cast(uint64x2_t, vshll_n_u32(vget_high_u32(__s0_754), __p1_754)); \ + __ret_754; \ }) #else -#define vshll_high_n_u32(__p0_739, __p1_739) __extension__ ({ \ - uint64x2_t __ret_739; \ - uint32x4_t __s0_739 = __p0_739; \ - uint32x4_t __rev0_739; __rev0_739 = __builtin_shufflevector(__s0_739, __s0_739, 3, 2, 1, 0); \ - __ret_739 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_739), __p1_739)); \ - __ret_739 = __builtin_shufflevector(__ret_739, __ret_739, 1, 0); \ - __ret_739; \ +#define vshll_high_n_u32(__p0_755, __p1_755) __extension__ ({ \ + uint64x2_t __ret_755; \ + uint32x4_t __s0_755 = __p0_755; \ + uint32x4_t __rev0_755; __rev0_755 = __builtin_shufflevector(__s0_755, __s0_755, __lane_reverse_128_32); \ + __ret_755 = __builtin_bit_cast(uint64x2_t, __noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_755), __p1_755)); \ + __ret_755 = __builtin_shufflevector(__ret_755, __ret_755, __lane_reverse_128_64); \ + __ret_755; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u16(__p0_740, __p1_740) __extension__ ({ \ - uint32x4_t __ret_740; \ - uint16x8_t __s0_740 = __p0_740; \ - __ret_740 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_740), __p1_740)); \ - __ret_740; \ +#define vshll_high_n_u16(__p0_756, __p1_756) __extension__ ({ \ + uint32x4_t __ret_756; \ + uint16x8_t __s0_756 = __p0_756; \ + __ret_756 = __builtin_bit_cast(uint32x4_t, vshll_n_u16(vget_high_u16(__s0_756), __p1_756)); \ + __ret_756; \ }) #else -#define vshll_high_n_u16(__p0_741, __p1_741) __extension__ ({ \ - uint32x4_t __ret_741; \ - uint16x8_t __s0_741 = __p0_741; \ - uint16x8_t __rev0_741; __rev0_741 = __builtin_shufflevector(__s0_741, __s0_741, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_741 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_741), __p1_741)); \ - __ret_741 = __builtin_shufflevector(__ret_741, __ret_741, 3, 2, 1, 0); \ - __ret_741; \ +#define vshll_high_n_u16(__p0_757, __p1_757) __extension__ ({ \ + uint32x4_t __ret_757; \ + uint16x8_t __s0_757 = __p0_757; \ + uint16x8_t __rev0_757; __rev0_757 = __builtin_shufflevector(__s0_757, __s0_757, __lane_reverse_128_16); \ + __ret_757 = __builtin_bit_cast(uint32x4_t, __noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_757), __p1_757)); \ + __ret_757 = __builtin_shufflevector(__ret_757, __ret_757, __lane_reverse_128_32); \ + __ret_757; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s8(__p0_742, __p1_742) __extension__ ({ \ - int16x8_t __ret_742; \ - int8x16_t __s0_742 = __p0_742; \ - __ret_742 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_742), __p1_742)); \ - __ret_742; \ +#define vshll_high_n_s8(__p0_758, __p1_758) __extension__ ({ \ + int16x8_t __ret_758; \ + int8x16_t __s0_758 = __p0_758; \ + __ret_758 = __builtin_bit_cast(int16x8_t, vshll_n_s8(vget_high_s8(__s0_758), __p1_758)); \ + __ret_758; \ }) #else -#define vshll_high_n_s8(__p0_743, __p1_743) __extension__ ({ \ - int16x8_t __ret_743; \ - int8x16_t __s0_743 = __p0_743; \ - int8x16_t __rev0_743; __rev0_743 = __builtin_shufflevector(__s0_743, __s0_743, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_743 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_743), __p1_743)); \ - __ret_743 = __builtin_shufflevector(__ret_743, __ret_743, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_743; \ +#define vshll_high_n_s8(__p0_759, __p1_759) __extension__ ({ \ + int16x8_t __ret_759; \ + int8x16_t __s0_759 = __p0_759; \ + int8x16_t __rev0_759; __rev0_759 = __builtin_shufflevector(__s0_759, __s0_759, __lane_reverse_128_8); \ + __ret_759 = __builtin_bit_cast(int16x8_t, __noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_759), __p1_759)); \ + __ret_759 = __builtin_shufflevector(__ret_759, __ret_759, __lane_reverse_128_16); \ + __ret_759; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s32(__p0_744, __p1_744) __extension__ ({ \ - int64x2_t __ret_744; \ - int32x4_t __s0_744 = __p0_744; \ - __ret_744 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_744), __p1_744)); \ - __ret_744; \ +#define vshll_high_n_s32(__p0_760, __p1_760) __extension__ ({ \ + int64x2_t __ret_760; \ + int32x4_t __s0_760 = __p0_760; \ + __ret_760 = __builtin_bit_cast(int64x2_t, vshll_n_s32(vget_high_s32(__s0_760), __p1_760)); \ + __ret_760; \ }) #else -#define vshll_high_n_s32(__p0_745, __p1_745) __extension__ ({ \ - int64x2_t __ret_745; \ - int32x4_t __s0_745 = __p0_745; \ - int32x4_t __rev0_745; __rev0_745 = __builtin_shufflevector(__s0_745, __s0_745, 3, 2, 1, 0); \ - __ret_745 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_745), __p1_745)); \ - __ret_745 = __builtin_shufflevector(__ret_745, __ret_745, 1, 0); \ - __ret_745; \ +#define vshll_high_n_s32(__p0_761, __p1_761) __extension__ ({ \ + int64x2_t __ret_761; \ + int32x4_t __s0_761 = __p0_761; \ + int32x4_t __rev0_761; __rev0_761 = __builtin_shufflevector(__s0_761, __s0_761, __lane_reverse_128_32); \ + __ret_761 = __builtin_bit_cast(int64x2_t, __noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_761), __p1_761)); \ + __ret_761 = __builtin_shufflevector(__ret_761, __ret_761, __lane_reverse_128_64); \ + __ret_761; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s16(__p0_746, __p1_746) __extension__ ({ \ - int32x4_t __ret_746; \ - int16x8_t __s0_746 = __p0_746; \ - __ret_746 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_746), __p1_746)); \ - __ret_746; \ +#define vshll_high_n_s16(__p0_762, __p1_762) __extension__ ({ \ + int32x4_t __ret_762; \ + int16x8_t __s0_762 = __p0_762; \ + __ret_762 = __builtin_bit_cast(int32x4_t, vshll_n_s16(vget_high_s16(__s0_762), __p1_762)); \ + __ret_762; \ }) #else -#define vshll_high_n_s16(__p0_747, __p1_747) __extension__ ({ \ - int32x4_t __ret_747; \ - int16x8_t __s0_747 = __p0_747; \ - int16x8_t __rev0_747; __rev0_747 = __builtin_shufflevector(__s0_747, __s0_747, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_747 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_747), __p1_747)); \ - __ret_747 = __builtin_shufflevector(__ret_747, __ret_747, 3, 2, 1, 0); \ - __ret_747; \ +#define vshll_high_n_s16(__p0_763, __p1_763) __extension__ ({ \ + int32x4_t __ret_763; \ + int16x8_t __s0_763 = __p0_763; \ + int16x8_t __rev0_763; __rev0_763 = __builtin_shufflevector(__s0_763, __s0_763, __lane_reverse_128_16); \ + __ret_763 = __builtin_bit_cast(int32x4_t, __noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_763), __p1_763)); \ + __ret_763 = __builtin_shufflevector(__ret_763, __ret_763, __lane_reverse_128_32); \ + __ret_763; \ }) #endif #define vshrd_n_u64(__p0, __p1) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ - __ret = (uint64_t) __builtin_neon_vshrd_n_u64(__s0, __p1); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vshrd_n_u64(__s0, __p1)); \ __ret; \ }) #define vshrd_n_s64(__p0, __p1) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ - __ret = (int64_t) __builtin_neon_vshrd_n_s64(__s0, __p1); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vshrd_n_s64(__s0, __p1)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u32(__p0_748, __p1_748, __p2_748) __extension__ ({ \ - uint16x8_t __ret_748; \ - uint16x4_t __s0_748 = __p0_748; \ - uint32x4_t __s1_748 = __p1_748; \ - __ret_748 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_748), (uint16x4_t)(vshrn_n_u32(__s1_748, __p2_748)))); \ - __ret_748; \ +#define vshrn_high_n_u32(__p0_764, __p1_764, __p2_764) __extension__ ({ \ + uint16x8_t __ret_764; \ + uint16x4_t __s0_764 = __p0_764; \ + uint32x4_t __s1_764 = __p1_764; \ + __ret_764 = __builtin_bit_cast(uint16x8_t, vcombine_u16(__builtin_bit_cast(uint16x4_t, __s0_764), __builtin_bit_cast(uint16x4_t, vshrn_n_u32(__s1_764, __p2_764)))); \ + __ret_764; \ }) #else -#define vshrn_high_n_u32(__p0_749, __p1_749, __p2_749) __extension__ ({ \ - uint16x8_t __ret_749; \ - uint16x4_t __s0_749 = __p0_749; \ - uint32x4_t __s1_749 = __p1_749; \ - uint16x4_t __rev0_749; __rev0_749 = __builtin_shufflevector(__s0_749, __s0_749, 3, 2, 1, 0); \ - uint32x4_t __rev1_749; __rev1_749 = __builtin_shufflevector(__s1_749, __s1_749, 3, 2, 1, 0); \ - __ret_749 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_749), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_749, __p2_749)))); \ - __ret_749 = __builtin_shufflevector(__ret_749, __ret_749, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_749; \ +#define vshrn_high_n_u32(__p0_765, __p1_765, __p2_765) __extension__ ({ \ + uint16x8_t __ret_765; \ + uint16x4_t __s0_765 = __p0_765; \ + uint32x4_t __s1_765 = __p1_765; \ + uint16x4_t __rev0_765; __rev0_765 = __builtin_shufflevector(__s0_765, __s0_765, __lane_reverse_64_16); \ + uint32x4_t __rev1_765; __rev1_765 = __builtin_shufflevector(__s1_765, __s1_765, __lane_reverse_128_32); \ + __ret_765 = __builtin_bit_cast(uint16x8_t, __noswap_vcombine_u16(__builtin_bit_cast(uint16x4_t, __rev0_765), __builtin_bit_cast(uint16x4_t, __noswap_vshrn_n_u32(__rev1_765, __p2_765)))); \ + __ret_765 = __builtin_shufflevector(__ret_765, __ret_765, __lane_reverse_128_16); \ + __ret_765; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u64(__p0_750, __p1_750, __p2_750) __extension__ ({ \ - uint32x4_t __ret_750; \ - uint32x2_t __s0_750 = __p0_750; \ - uint64x2_t __s1_750 = __p1_750; \ - __ret_750 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_750), (uint32x2_t)(vshrn_n_u64(__s1_750, __p2_750)))); \ - __ret_750; \ +#define vshrn_high_n_u64(__p0_766, __p1_766, __p2_766) __extension__ ({ \ + uint32x4_t __ret_766; \ + uint32x2_t __s0_766 = __p0_766; \ + uint64x2_t __s1_766 = __p1_766; \ + __ret_766 = __builtin_bit_cast(uint32x4_t, vcombine_u32(__builtin_bit_cast(uint32x2_t, __s0_766), __builtin_bit_cast(uint32x2_t, vshrn_n_u64(__s1_766, __p2_766)))); \ + __ret_766; \ }) #else -#define vshrn_high_n_u64(__p0_751, __p1_751, __p2_751) __extension__ ({ \ - uint32x4_t __ret_751; \ - uint32x2_t __s0_751 = __p0_751; \ - uint64x2_t __s1_751 = __p1_751; \ - uint32x2_t __rev0_751; __rev0_751 = __builtin_shufflevector(__s0_751, __s0_751, 1, 0); \ - uint64x2_t __rev1_751; __rev1_751 = __builtin_shufflevector(__s1_751, __s1_751, 1, 0); \ - __ret_751 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_751), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_751, __p2_751)))); \ - __ret_751 = __builtin_shufflevector(__ret_751, __ret_751, 3, 2, 1, 0); \ - __ret_751; \ +#define vshrn_high_n_u64(__p0_767, __p1_767, __p2_767) __extension__ ({ \ + uint32x4_t __ret_767; \ + uint32x2_t __s0_767 = __p0_767; \ + uint64x2_t __s1_767 = __p1_767; \ + uint32x2_t __rev0_767; __rev0_767 = __builtin_shufflevector(__s0_767, __s0_767, __lane_reverse_64_32); \ + uint64x2_t __rev1_767; __rev1_767 = __builtin_shufflevector(__s1_767, __s1_767, __lane_reverse_128_64); \ + __ret_767 = __builtin_bit_cast(uint32x4_t, __noswap_vcombine_u32(__builtin_bit_cast(uint32x2_t, __rev0_767), __builtin_bit_cast(uint32x2_t, __noswap_vshrn_n_u64(__rev1_767, __p2_767)))); \ + __ret_767 = __builtin_shufflevector(__ret_767, __ret_767, __lane_reverse_128_32); \ + __ret_767; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u16(__p0_752, __p1_752, __p2_752) __extension__ ({ \ - uint8x16_t __ret_752; \ - uint8x8_t __s0_752 = __p0_752; \ - uint16x8_t __s1_752 = __p1_752; \ - __ret_752 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_752), (uint8x8_t)(vshrn_n_u16(__s1_752, __p2_752)))); \ - __ret_752; \ +#define vshrn_high_n_u16(__p0_768, __p1_768, __p2_768) __extension__ ({ \ + uint8x16_t __ret_768; \ + uint8x8_t __s0_768 = __p0_768; \ + uint16x8_t __s1_768 = __p1_768; \ + __ret_768 = __builtin_bit_cast(uint8x16_t, vcombine_u8(__builtin_bit_cast(uint8x8_t, __s0_768), __builtin_bit_cast(uint8x8_t, vshrn_n_u16(__s1_768, __p2_768)))); \ + __ret_768; \ }) #else -#define vshrn_high_n_u16(__p0_753, __p1_753, __p2_753) __extension__ ({ \ - uint8x16_t __ret_753; \ - uint8x8_t __s0_753 = __p0_753; \ - uint16x8_t __s1_753 = __p1_753; \ - uint8x8_t __rev0_753; __rev0_753 = __builtin_shufflevector(__s0_753, __s0_753, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_753; __rev1_753 = __builtin_shufflevector(__s1_753, __s1_753, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_753 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_753), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_753, __p2_753)))); \ - __ret_753 = __builtin_shufflevector(__ret_753, __ret_753, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_753; \ +#define vshrn_high_n_u16(__p0_769, __p1_769, __p2_769) __extension__ ({ \ + uint8x16_t __ret_769; \ + uint8x8_t __s0_769 = __p0_769; \ + uint16x8_t __s1_769 = __p1_769; \ + uint8x8_t __rev0_769; __rev0_769 = __builtin_shufflevector(__s0_769, __s0_769, __lane_reverse_64_8); \ + uint16x8_t __rev1_769; __rev1_769 = __builtin_shufflevector(__s1_769, __s1_769, __lane_reverse_128_16); \ + __ret_769 = __builtin_bit_cast(uint8x16_t, __noswap_vcombine_u8(__builtin_bit_cast(uint8x8_t, __rev0_769), __builtin_bit_cast(uint8x8_t, __noswap_vshrn_n_u16(__rev1_769, __p2_769)))); \ + __ret_769 = __builtin_shufflevector(__ret_769, __ret_769, __lane_reverse_128_8); \ + __ret_769; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s32(__p0_754, __p1_754, __p2_754) __extension__ ({ \ - int16x8_t __ret_754; \ - int16x4_t __s0_754 = __p0_754; \ - int32x4_t __s1_754 = __p1_754; \ - __ret_754 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_754), (int16x4_t)(vshrn_n_s32(__s1_754, __p2_754)))); \ - __ret_754; \ +#define vshrn_high_n_s32(__p0_770, __p1_770, __p2_770) __extension__ ({ \ + int16x8_t __ret_770; \ + int16x4_t __s0_770 = __p0_770; \ + int32x4_t __s1_770 = __p1_770; \ + __ret_770 = __builtin_bit_cast(int16x8_t, vcombine_s16(__builtin_bit_cast(int16x4_t, __s0_770), __builtin_bit_cast(int16x4_t, vshrn_n_s32(__s1_770, __p2_770)))); \ + __ret_770; \ }) #else -#define vshrn_high_n_s32(__p0_755, __p1_755, __p2_755) __extension__ ({ \ - int16x8_t __ret_755; \ - int16x4_t __s0_755 = __p0_755; \ - int32x4_t __s1_755 = __p1_755; \ - int16x4_t __rev0_755; __rev0_755 = __builtin_shufflevector(__s0_755, __s0_755, 3, 2, 1, 0); \ - int32x4_t __rev1_755; __rev1_755 = __builtin_shufflevector(__s1_755, __s1_755, 3, 2, 1, 0); \ - __ret_755 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_755), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_755, __p2_755)))); \ - __ret_755 = __builtin_shufflevector(__ret_755, __ret_755, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_755; \ +#define vshrn_high_n_s32(__p0_771, __p1_771, __p2_771) __extension__ ({ \ + int16x8_t __ret_771; \ + int16x4_t __s0_771 = __p0_771; \ + int32x4_t __s1_771 = __p1_771; \ + int16x4_t __rev0_771; __rev0_771 = __builtin_shufflevector(__s0_771, __s0_771, __lane_reverse_64_16); \ + int32x4_t __rev1_771; __rev1_771 = __builtin_shufflevector(__s1_771, __s1_771, __lane_reverse_128_32); \ + __ret_771 = __builtin_bit_cast(int16x8_t, __noswap_vcombine_s16(__builtin_bit_cast(int16x4_t, __rev0_771), __builtin_bit_cast(int16x4_t, __noswap_vshrn_n_s32(__rev1_771, __p2_771)))); \ + __ret_771 = __builtin_shufflevector(__ret_771, __ret_771, __lane_reverse_128_16); \ + __ret_771; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s64(__p0_756, __p1_756, __p2_756) __extension__ ({ \ - int32x4_t __ret_756; \ - int32x2_t __s0_756 = __p0_756; \ - int64x2_t __s1_756 = __p1_756; \ - __ret_756 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_756), (int32x2_t)(vshrn_n_s64(__s1_756, __p2_756)))); \ - __ret_756; \ +#define vshrn_high_n_s64(__p0_772, __p1_772, __p2_772) __extension__ ({ \ + int32x4_t __ret_772; \ + int32x2_t __s0_772 = __p0_772; \ + int64x2_t __s1_772 = __p1_772; \ + __ret_772 = __builtin_bit_cast(int32x4_t, vcombine_s32(__builtin_bit_cast(int32x2_t, __s0_772), __builtin_bit_cast(int32x2_t, vshrn_n_s64(__s1_772, __p2_772)))); \ + __ret_772; \ }) #else -#define vshrn_high_n_s64(__p0_757, __p1_757, __p2_757) __extension__ ({ \ - int32x4_t __ret_757; \ - int32x2_t __s0_757 = __p0_757; \ - int64x2_t __s1_757 = __p1_757; \ - int32x2_t __rev0_757; __rev0_757 = __builtin_shufflevector(__s0_757, __s0_757, 1, 0); \ - int64x2_t __rev1_757; __rev1_757 = __builtin_shufflevector(__s1_757, __s1_757, 1, 0); \ - __ret_757 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_757), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_757, __p2_757)))); \ - __ret_757 = __builtin_shufflevector(__ret_757, __ret_757, 3, 2, 1, 0); \ - __ret_757; \ +#define vshrn_high_n_s64(__p0_773, __p1_773, __p2_773) __extension__ ({ \ + int32x4_t __ret_773; \ + int32x2_t __s0_773 = __p0_773; \ + int64x2_t __s1_773 = __p1_773; \ + int32x2_t __rev0_773; __rev0_773 = __builtin_shufflevector(__s0_773, __s0_773, __lane_reverse_64_32); \ + int64x2_t __rev1_773; __rev1_773 = __builtin_shufflevector(__s1_773, __s1_773, __lane_reverse_128_64); \ + __ret_773 = __builtin_bit_cast(int32x4_t, __noswap_vcombine_s32(__builtin_bit_cast(int32x2_t, __rev0_773), __builtin_bit_cast(int32x2_t, __noswap_vshrn_n_s64(__rev1_773, __p2_773)))); \ + __ret_773 = __builtin_shufflevector(__ret_773, __ret_773, __lane_reverse_128_32); \ + __ret_773; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s16(__p0_758, __p1_758, __p2_758) __extension__ ({ \ - int8x16_t __ret_758; \ - int8x8_t __s0_758 = __p0_758; \ - int16x8_t __s1_758 = __p1_758; \ - __ret_758 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_758), (int8x8_t)(vshrn_n_s16(__s1_758, __p2_758)))); \ - __ret_758; \ +#define vshrn_high_n_s16(__p0_774, __p1_774, __p2_774) __extension__ ({ \ + int8x16_t __ret_774; \ + int8x8_t __s0_774 = __p0_774; \ + int16x8_t __s1_774 = __p1_774; \ + __ret_774 = __builtin_bit_cast(int8x16_t, vcombine_s8(__builtin_bit_cast(int8x8_t, __s0_774), __builtin_bit_cast(int8x8_t, vshrn_n_s16(__s1_774, __p2_774)))); \ + __ret_774; \ }) #else -#define vshrn_high_n_s16(__p0_759, __p1_759, __p2_759) __extension__ ({ \ - int8x16_t __ret_759; \ - int8x8_t __s0_759 = __p0_759; \ - int16x8_t __s1_759 = __p1_759; \ - int8x8_t __rev0_759; __rev0_759 = __builtin_shufflevector(__s0_759, __s0_759, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_759; __rev1_759 = __builtin_shufflevector(__s1_759, __s1_759, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_759 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_759), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_759, __p2_759)))); \ - __ret_759 = __builtin_shufflevector(__ret_759, __ret_759, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_759; \ +#define vshrn_high_n_s16(__p0_775, __p1_775, __p2_775) __extension__ ({ \ + int8x16_t __ret_775; \ + int8x8_t __s0_775 = __p0_775; \ + int16x8_t __s1_775 = __p1_775; \ + int8x8_t __rev0_775; __rev0_775 = __builtin_shufflevector(__s0_775, __s0_775, __lane_reverse_64_8); \ + int16x8_t __rev1_775; __rev1_775 = __builtin_shufflevector(__s1_775, __s1_775, __lane_reverse_128_16); \ + __ret_775 = __builtin_bit_cast(int8x16_t, __noswap_vcombine_s8(__builtin_bit_cast(int8x8_t, __rev0_775), __builtin_bit_cast(int8x8_t, __noswap_vshrn_n_s16(__rev1_775, __p2_775)))); \ + __ret_775 = __builtin_shufflevector(__ret_775, __ret_775, __lane_reverse_128_8); \ + __ret_775; \ }) #endif @@ -62707,21 +64681,21 @@ __ai __attribute__((target("neon"))) int64_t vshld_s64(int64_t __p0, int64_t __p uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ - __ret = (uint64_t) __builtin_neon_vslid_n_u64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vslid_n_u64(__s0, __s1, __p2)); \ __ret; \ }) #define vslid_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ - __ret = (int64_t) __builtin_neon_vslid_n_s64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vslid_n_s64(__s0, __s1, __p2)); \ __ret; \ }) #define vsli_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ - __ret = (poly64x1_t) __builtin_neon_vsli_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vsli_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -62729,7 +64703,7 @@ __ai __attribute__((target("neon"))) int64_t vshld_s64(int64_t __p0, int64_t __p poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 38)); \ __ret; \ }) #else @@ -62737,47 +64711,47 @@ __ai __attribute__((target("neon"))) int64_t vshld_s64(int64_t __p0, int64_t __p poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_vsliq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsliq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif __ai __attribute__((target("neon"))) uint8_t vsqaddb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; - __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); + __ret = __builtin_bit_cast(uint8_t, __builtin_neon_vsqaddb_u8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint32_t vsqadds_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; - __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); + __ret = __builtin_bit_cast(uint32_t, __builtin_neon_vsqadds_u32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vsqaddd_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vsqaddd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint16_t vsqaddh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; - __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); + __ret = __builtin_bit_cast(uint16_t, __builtin_neon_vsqaddh_u16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 48)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -62785,16 +64759,16 @@ __ai __attribute__((target("neon"))) uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vsqaddq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vsqaddq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -62802,16 +64776,16 @@ __ai __attribute__((target("neon"))) uint32x4_t vsqaddq_u32(uint32x4_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vsqaddq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vsqaddq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -62819,16 +64793,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vsqaddq_u64(uint64x2_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vsqaddq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 49)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vsqaddq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vsqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -62836,16 +64810,16 @@ __ai __attribute__((target("neon"))) uint16x8_t vsqaddq_u16(uint16x8_t __p0, int #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 16)); return __ret; } #else __ai __attribute__((target("neon"))) uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint8x8_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 16)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -62853,38 +64827,38 @@ __ai __attribute__((target("neon"))) uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 18)); return __ret; } #else __ai __attribute__((target("neon"))) uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint32x2_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 18)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vsqadd_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 17)); return __ret; } #else __ai __attribute__((target("neon"))) uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint16x4_t, __builtin_neon_vsqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 17)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -62892,15 +64866,15 @@ __ai __attribute__((target("neon"))) uint16x4_t vsqadd_u16(uint16x4_t __p0, int1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float64x2_t vsqrtq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vsqrtq_v(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("neon"))) float64x2_t vsqrtq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vsqrtq_v(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -62908,36 +64882,36 @@ __ai __attribute__((target("neon"))) float64x2_t vsqrtq_f64(float64x2_t __p0) { #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x4_t vsqrtq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vsqrtq_v(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("neon"))) float32x4_t vsqrtq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vsqrtq_v((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vsqrtq_v(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif __ai __attribute__((target("neon"))) float64x1_t vsqrt_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vsqrt_v(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) float32x2_t vsqrt_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vsqrt_v(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("neon"))) float32x2_t vsqrt_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vsqrt_v((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vsqrt_v(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -62946,35 +64920,35 @@ __ai __attribute__((target("neon"))) float32x2_t vsqrt_f32(float32x2_t __p0) { uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ - __ret = (uint64_t) __builtin_neon_vsrad_n_u64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vsrad_n_u64(__s0, __s1, __p2)); \ __ret; \ }) #define vsrad_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ - __ret = (int64_t) __builtin_neon_vsrad_n_s64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vsrad_n_s64(__s0, __s1, __p2)); \ __ret; \ }) #define vsrid_n_u64(__p0, __p1, __p2) __extension__ ({ \ uint64_t __ret; \ uint64_t __s0 = __p0; \ uint64_t __s1 = __p1; \ - __ret = (uint64_t) __builtin_neon_vsrid_n_u64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vsrid_n_u64(__s0, __s1, __p2)); \ __ret; \ }) #define vsrid_n_s64(__p0, __p1, __p2) __extension__ ({ \ int64_t __ret; \ int64_t __s0 = __p0; \ int64_t __s1 = __p1; \ - __ret = (int64_t) __builtin_neon_vsrid_n_s64(__s0, __s1, __p2); \ + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vsrid_n_s64(__s0, __s1, __p2)); \ __ret; \ }) #define vsri_n_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s0 = __p0; \ poly64x1_t __s1 = __p1; \ - __ret = (poly64x1_t) __builtin_neon_vsri_n_v((int8x8_t)__s0, (int8x8_t)__s1, __p2, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vsri_n_v(__builtin_bit_cast(int8x8_t, __s0), __builtin_bit_cast(int8x8_t, __s1), __p2, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ @@ -62982,7 +64956,7 @@ __ai __attribute__((target("neon"))) float32x2_t vsqrt_f32(float32x2_t __p0) { poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__s0, (int8x16_t)__s1, __p2, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 38)); \ __ret; \ }) #else @@ -62990,814 +64964,1154 @@ __ai __attribute__((target("neon"))) float32x2_t vsqrt_f32(float32x2_t __p0) { poly64x2_t __ret; \ poly64x2_t __s0 = __p0; \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_vsriq_n_v((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vsriq_n_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif #define vst1_p64(__p0, __p1) __extension__ ({ \ poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 6); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 38); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 38); \ }) #else #define vst1q_p64(__p0, __p1) __extension__ ({ \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 38); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__s1, 42); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 42); \ }) #else #define vst1q_f64(__p0, __p1) __extension__ ({ \ float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_v(__p0, (int8x16_t)__rev1, 42); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 42); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __s1 = __p1; \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __s1), 44); \ +}) +#else +#define vst1q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), 44); \ }) #endif #define vst1_f64(__p0, __p1) __extension__ ({ \ float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_v(__p0, (int8x8_t)__s1, 10); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst1_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __s1 = __p1; \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __s1), 12); \ +}) +#else +#define vst1_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), 12); \ +}) +#endif + #define vst1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 6); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 38); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 38); \ }) #else #define vst1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 38); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__s1, __p2, 42); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 42); \ }) #else #define vst1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vst1q_lane_v(__p0, (int8x16_t)__rev1, __p2, 42); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 42); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __s1 = __p1; \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 44); \ +}) +#else +#define vst1q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_8); \ + __builtin_neon_vst1q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 44); \ }) #endif #define vst1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s1 = __p1; \ - __builtin_neon_vst1_lane_v(__p0, (int8x8_t)__s1, __p2, 10); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst1_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __s1 = __p1; \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 12); \ +}) +#else +#define vst1_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_64_8); \ + __builtin_neon_vst1_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1), __p2, 12); \ +}) +#endif + #define vst1_p64_x2(__p0, __p1) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 38); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 38); \ }) #else #define vst1q_p64_x2(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 42); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 42); \ }) #else #define vst1q_f64_x2(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst1q_x2_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 42); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_mf8_x2(__p0, __p1) __extension__ ({ \ + mfloat8x16x2_t __s1 = __p1; \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 44); \ +}) +#else +#define vst1q_mf8_x2(__p0, __p1) __extension__ ({ \ + mfloat8x16x2_t __s1 = __p1; \ + mfloat8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x2_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 44); \ }) #endif #define vst1_f64_x2(__p0, __p1) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst1_x2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 10); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst1_mf8_x2(__p0, __p1) __extension__ ({ \ + mfloat8x8x2_t __s1 = __p1; \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 12); \ +}) +#else +#define vst1_mf8_x2(__p0, __p1) __extension__ ({ \ + mfloat8x8x2_t __s1 = __p1; \ + mfloat8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst1_x2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 12); \ +}) +#endif + #define vst1_p64_x3(__p0, __p1) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 38); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 38); \ }) #else #define vst1q_p64_x3(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 42); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 42); \ }) #else #define vst1q_f64_x3(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst1q_x3_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 42); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_mf8_x3(__p0, __p1) __extension__ ({ \ + mfloat8x16x3_t __s1 = __p1; \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 44); \ +}) +#else +#define vst1q_mf8_x3(__p0, __p1) __extension__ ({ \ + mfloat8x16x3_t __s1 = __p1; \ + mfloat8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x3_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 44); \ }) #endif #define vst1_f64_x3(__p0, __p1) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst1_x3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 10); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst1_mf8_x3(__p0, __p1) __extension__ ({ \ + mfloat8x8x3_t __s1 = __p1; \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 12); \ +}) +#else +#define vst1_mf8_x3(__p0, __p1) __extension__ ({ \ + mfloat8x8x3_t __s1 = __p1; \ + mfloat8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst1_x3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 12); \ +}) +#endif + #define vst1_p64_x4(__p0, __p1) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 38); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 38); \ }) #else #define vst1q_p64_x4(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 42); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 42); \ }) #else #define vst1q_f64_x4(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst1q_x4_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 42); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst1q_mf8_x4(__p0, __p1) __extension__ ({ \ + mfloat8x16x4_t __s1 = __p1; \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 44); \ +}) +#else +#define vst1q_mf8_x4(__p0, __p1) __extension__ ({ \ + mfloat8x16x4_t __s1 = __p1; \ + mfloat8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst1q_x4_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 44); \ }) #endif #define vst1_f64_x4(__p0, __p1) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst1_x4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 10); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst1_mf8_x4(__p0, __p1) __extension__ ({ \ + mfloat8x8x4_t __s1 = __p1; \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 12); \ +}) +#else +#define vst1_mf8_x4(__p0, __p1) __extension__ ({ \ + mfloat8x8x4_t __s1 = __p1; \ + mfloat8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst1_x4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 12); \ +}) +#endif + #define vst2_p64(__p0, __p1) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 6); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2q_p64(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 38); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 38); \ }) #else #define vst2q_p64(__p0, __p1) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_u64(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 51); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 51); \ }) #else #define vst2q_u64(__p0, __p1) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_f64(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 42); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 42); \ }) #else #define vst2q_f64(__p0, __p1) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_s64(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], 35); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 35); \ }) #else #define vst2q_s64(__p0, __p1) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 35); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst2q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16x2_t __s1 = __p1; \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), 44); \ +}) +#else +#define vst2q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16x2_t __s1 = __p1; \ + mfloat8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), 44); \ }) #endif #define vst2_f64(__p0, __p1) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], 10); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst2_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8x2_t __s1 = __p1; \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), 12); \ +}) +#else +#define vst2_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8x2_t __s1 = __p1; \ + mfloat8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), 12); \ +}) +#endif + #define vst2_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 6); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 36); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 36); \ }) #else #define vst2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x2_t __s1 = __p1; \ poly8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 38); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 38); \ }) #else #define vst2q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x2_t __s1 = __p1; \ poly64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 48); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 48); \ }) #else #define vst2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x2_t __s1 = __p1; \ uint8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 51); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 51); \ }) #else #define vst2q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x2_t __s1 = __p1; \ uint64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 32); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 32); \ }) #else #define vst2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x2_t __s1 = __p1; \ int8x16x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 42); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 42); \ }) #else #define vst2q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x2_t __s1 = __p1; \ float64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], __p2, 35); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 35); \ }) #else #define vst2q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x2_t __s1 = __p1; \ int64x2x2_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __builtin_neon_vst2q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], __p2, 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 35); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst2q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x2_t __s1 = __p1; \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __p2, 44); \ +}) +#else +#define vst2q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x2_t __s1 = __p1; \ + mfloat8x16x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __builtin_neon_vst2q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __p2, 44); \ }) #endif #define vst2_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 19); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 19); \ }) #define vst2_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 10); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 10); \ }) #define vst2_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x2_t __s1 = __p1; \ - __builtin_neon_vst2_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], __p2, 3); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 3); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst2_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x2_t __s1 = __p1; \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __p2, 12); \ +}) +#else +#define vst2_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x2_t __s1 = __p1; \ + mfloat8x8x2_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __builtin_neon_vst2_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __p2, 12); \ +}) +#endif + #define vst3_p64(__p0, __p1) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 6); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3q_p64(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 38); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 38); \ }) #else #define vst3q_p64(__p0, __p1) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_u64(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 51); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 51); \ }) #else #define vst3q_u64(__p0, __p1) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_f64(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 42); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 42); \ }) #else #define vst3q_f64(__p0, __p1) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_s64(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], 35); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 35); \ }) #else #define vst3q_s64(__p0, __p1) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 35); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst3q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16x3_t __s1 = __p1; \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), 44); \ +}) +#else +#define vst3q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16x3_t __s1 = __p1; \ + mfloat8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), 44); \ }) #endif #define vst3_f64(__p0, __p1) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], 10); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst3_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8x3_t __s1 = __p1; \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), 12); \ +}) +#else +#define vst3_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8x3_t __s1 = __p1; \ + mfloat8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), 12); \ +}) +#endif + #define vst3_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 6); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 36); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 36); \ }) #else #define vst3q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x3_t __s1 = __p1; \ poly8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 38); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 38); \ }) #else #define vst3q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x3_t __s1 = __p1; \ poly64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 48); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 48); \ }) #else #define vst3q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x3_t __s1 = __p1; \ uint8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 51); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 51); \ }) #else #define vst3q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x3_t __s1 = __p1; \ uint64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 32); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 32); \ }) #else #define vst3q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x3_t __s1 = __p1; \ int8x16x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 42); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 42); \ }) #else #define vst3q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x3_t __s1 = __p1; \ float64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], __p2, 35); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 35); \ }) #else #define vst3q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x3_t __s1 = __p1; \ int64x2x3_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __builtin_neon_vst3q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], __p2, 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 35); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst3q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x3_t __s1 = __p1; \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __p2, 44); \ +}) +#else +#define vst3q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x3_t __s1 = __p1; \ + mfloat8x16x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __builtin_neon_vst3q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __p2, 44); \ }) #endif #define vst3_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 19); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 19); \ }) #define vst3_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 10); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 10); \ }) #define vst3_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x3_t __s1 = __p1; \ - __builtin_neon_vst3_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], __p2, 3); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 3); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst3_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x3_t __s1 = __p1; \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __p2, 12); \ +}) +#else +#define vst3_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x3_t __s1 = __p1; \ + mfloat8x8x3_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __builtin_neon_vst3_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __p2, 12); \ +}) +#endif + #define vst4_p64(__p0, __p1) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 6); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4q_p64(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 38); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 38); \ }) #else #define vst4q_p64(__p0, __p1) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_u64(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 51); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 51); \ }) #else #define vst4q_u64(__p0, __p1) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_f64(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 42); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 42); \ }) #else #define vst4q_f64(__p0, __p1) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_s64(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], 35); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 35); \ }) #else #define vst4q_s64(__p0, __p1) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 35); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst4q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16x4_t __s1 = __p1; \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), 44); \ +}) +#else +#define vst4q_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x16x4_t __s1 = __p1; \ + mfloat8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), 44); \ }) #endif #define vst4_f64(__p0, __p1) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], 10); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 10); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst4_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8x4_t __s1 = __p1; \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), 12); \ +}) +#else +#define vst4_mf8(__p0, __p1) __extension__ ({ \ + mfloat8x8x4_t __s1 = __p1; \ + mfloat8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), 12); \ +}) +#endif + #define vst4_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 6); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 36); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 36); \ }) #else #define vst4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ poly8x16x4_t __s1 = __p1; \ poly8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 36); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 36); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 38); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 38); \ }) #else #define vst4q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2x4_t __s1 = __p1; \ poly64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 38); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 48); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 48); \ }) #else #define vst4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ uint8x16x4_t __s1 = __p1; \ uint8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 48); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 48); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 51); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 51); \ }) #else #define vst4q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2x4_t __s1 = __p1; \ uint64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 51); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 32); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 32); \ }) #else #define vst4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ int8x16x4_t __s1 = __p1; \ int8x16x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 32); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 32); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 42); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 42); \ }) #else #define vst4q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2x4_t __s1 = __p1; \ float64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 42); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vst4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__s1.val[0], (int8x16_t)__s1.val[1], (int8x16_t)__s1.val[2], (int8x16_t)__s1.val[3], __p2, 35); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 35); \ }) #else #define vst4q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2x4_t __s1 = __p1; \ int64x2x4_t __rev1; \ - __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], 1, 0); \ - __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], 1, 0); \ - __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], 1, 0); \ - __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], 1, 0); \ - __builtin_neon_vst4q_lane_v(__p0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], __p2, 35); \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_64); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_64); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_64); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_64); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 35); \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vst4q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x4_t __s1 = __p1; \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __s1.val[0]), __builtin_bit_cast(int8x16_t, __s1.val[1]), __builtin_bit_cast(int8x16_t, __s1.val[2]), __builtin_bit_cast(int8x16_t, __s1.val[3]), __p2, 44); \ +}) +#else +#define vst4q_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x16x4_t __s1 = __p1; \ + mfloat8x16x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_128_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_128_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_128_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_128_8); \ + __builtin_neon_vst4q_lane_v(__p0, __builtin_bit_cast(int8x16_t, __rev1.val[0]), __builtin_bit_cast(int8x16_t, __rev1.val[1]), __builtin_bit_cast(int8x16_t, __rev1.val[2]), __builtin_bit_cast(int8x16_t, __rev1.val[3]), __p2, 44); \ }) #endif #define vst4_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 19); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 19); \ }) #define vst4_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 10); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 10); \ }) #define vst4_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1x4_t __s1 = __p1; \ - __builtin_neon_vst4_lane_v(__p0, (int8x8_t)__s1.val[0], (int8x8_t)__s1.val[1], (int8x8_t)__s1.val[2], (int8x8_t)__s1.val[3], __p2, 3); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 3); \ }) +#ifdef __LITTLE_ENDIAN__ +#define vst4_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x4_t __s1 = __p1; \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __s1.val[0]), __builtin_bit_cast(int8x8_t, __s1.val[1]), __builtin_bit_cast(int8x8_t, __s1.val[2]), __builtin_bit_cast(int8x8_t, __s1.val[3]), __p2, 12); \ +}) +#else +#define vst4_lane_mf8(__p0, __p1, __p2) __extension__ ({ \ + mfloat8x8x4_t __s1 = __p1; \ + mfloat8x8x4_t __rev1; \ + __rev1.val[0] = __builtin_shufflevector(__s1.val[0], __s1.val[0], __lane_reverse_64_8); \ + __rev1.val[1] = __builtin_shufflevector(__s1.val[1], __s1.val[1], __lane_reverse_64_8); \ + __rev1.val[2] = __builtin_shufflevector(__s1.val[2], __s1.val[2], __lane_reverse_64_8); \ + __rev1.val[3] = __builtin_shufflevector(__s1.val[3], __s1.val[3], __lane_reverse_64_8); \ + __builtin_neon_vst4_lane_v(__p0, __builtin_bit_cast(int8x8_t, __rev1.val[0]), __builtin_bit_cast(int8x8_t, __rev1.val[1]), __builtin_bit_cast(int8x8_t, __rev1.val[2]), __builtin_bit_cast(int8x8_t, __rev1.val[3]), __p2, 12); \ +}) +#endif + #define vstrq_p128(__p0, __p1) __extension__ ({ \ poly128_t __s1 = __p1; \ __builtin_neon_vstrq_p128(__p0, __s1); \ }) __ai __attribute__((target("neon"))) uint64_t vsubd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vsubd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vsubd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vsubd_s64(int64_t __p0, int64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vsubd_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vsubd_s64(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ @@ -63809,10 +66123,10 @@ __ai __attribute__((target("neon"))) float64x2_t vsubq_f64(float64x2_t __p0, flo #else __ai __attribute__((target("neon"))) float64x2_t vsubq_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __rev0 - __rev1; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -63831,11 +66145,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vsubhn_high_u32(uint16x4_t __p0, #else __ai __attribute__((target("neon"))) uint16x8_t vsubhn_high_u32(uint16x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint16x8_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_u16(__rev0, __noswap_vsubhn_u32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -63849,11 +66163,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vsubhn_high_u64(uint32x2_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vsubhn_high_u64(uint32x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint32x4_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_u32(__rev0, __noswap_vsubhn_u64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -63867,11 +66181,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vsubhn_high_u16(uint8x8_t __p0, #else __ai __attribute__((target("neon"))) uint8x16_t vsubhn_high_u16(uint8x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint8x16_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_u8(__rev0, __noswap_vsubhn_u16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -63885,11 +66199,11 @@ __ai __attribute__((target("neon"))) int16x8_t vsubhn_high_s32(int16x4_t __p0, i #else __ai __attribute__((target("neon"))) int16x8_t vsubhn_high_s32(int16x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int16x8_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vcombine_s16(__rev0, __noswap_vsubhn_s32(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -63903,11 +66217,11 @@ __ai __attribute__((target("neon"))) int32x4_t vsubhn_high_s64(int32x2_t __p0, i #else __ai __attribute__((target("neon"))) int32x4_t vsubhn_high_s64(int32x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int32x4_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); __ret = __noswap_vcombine_s32(__rev0, __noswap_vsubhn_s64(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -63921,11 +66235,11 @@ __ai __attribute__((target("neon"))) int8x16_t vsubhn_high_s16(int8x8_t __p0, in #else __ai __attribute__((target("neon"))) int8x16_t vsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int8x16_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vcombine_s8(__rev0, __noswap_vsubhn_s16(__rev1, __rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -63939,10 +66253,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vsubl_high_u8(uint8x16_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vsubl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmovl_high_u8(__rev0) - __noswap_vmovl_high_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -63956,10 +66270,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vsubl_high_u32(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vsubl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmovl_high_u32(__rev0) - __noswap_vmovl_high_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -63973,10 +66287,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vsubl_high_u16(uint16x8_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vsubl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmovl_high_u16(__rev0) - __noswap_vmovl_high_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -63990,10 +66304,10 @@ __ai __attribute__((target("neon"))) int16x8_t vsubl_high_s8(int8x16_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vsubl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmovl_high_s8(__rev0) - __noswap_vmovl_high_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64007,10 +66321,10 @@ __ai __attribute__((target("neon"))) int64x2_t vsubl_high_s32(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vsubl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmovl_high_s32(__rev0) - __noswap_vmovl_high_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64024,10 +66338,10 @@ __ai __attribute__((target("neon"))) int32x4_t vsubl_high_s16(int16x8_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vsubl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmovl_high_s16(__rev0) - __noswap_vmovl_high_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64041,10 +66355,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vsubw_high_u8(uint16x8_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vsubw_high_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 - __noswap_vmovl_high_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64058,10 +66372,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vsubw_high_u32(uint64x2_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vsubw_high_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __noswap_vmovl_high_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64075,10 +66389,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vsubw_high_u16(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vsubw_high_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __noswap_vmovl_high_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64092,10 +66406,10 @@ __ai __attribute__((target("neon"))) int16x8_t vsubw_high_s8(int16x8_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vsubw_high_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 - __noswap_vmovl_high_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64109,10 +66423,10 @@ __ai __attribute__((target("neon"))) int64x2_t vsubw_high_s32(int64x2_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vsubw_high_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 - __noswap_vmovl_high_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64126,10 +66440,10 @@ __ai __attribute__((target("neon"))) int32x4_t vsubw_high_s16(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vsubw_high_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 - __noswap_vmovl_high_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64143,10 +66457,10 @@ __ai __attribute__((target("neon"))) poly8x8_t vtrn1_p8(poly8x8_t __p0, poly8x8_ #else __ai __attribute__((target("neon"))) poly8x8_t vtrn1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64160,10 +66474,10 @@ __ai __attribute__((target("neon"))) poly16x4_t vtrn1_p16(poly16x4_t __p0, poly1 #else __ai __attribute__((target("neon"))) poly16x4_t vtrn1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64177,10 +66491,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vtrn1q_p8(poly8x16_t __p0, poly8 #else __ai __attribute__((target("neon"))) poly8x16_t vtrn1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64194,10 +66508,10 @@ __ai __attribute__((target("neon"))) poly64x2_t vtrn1q_p64(poly64x2_t __p0, poly #else __ai __attribute__((target("neon"))) poly64x2_t vtrn1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64211,10 +66525,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vtrn1q_p16(poly16x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly16x8_t vtrn1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64228,10 +66542,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vtrn1q_u8(uint8x16_t __p0, uint8 #else __ai __attribute__((target("neon"))) uint8x16_t vtrn1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64245,10 +66559,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vtrn1q_u32(uint32x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x4_t vtrn1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64262,10 +66576,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vtrn1q_u64(uint64x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint64x2_t vtrn1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64279,10 +66593,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vtrn1q_u16(uint16x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x8_t vtrn1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64296,10 +66610,10 @@ __ai __attribute__((target("neon"))) int8x16_t vtrn1q_s8(int8x16_t __p0, int8x16 #else __ai __attribute__((target("neon"))) int8x16_t vtrn1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64313,10 +66627,10 @@ __ai __attribute__((target("neon"))) float64x2_t vtrn1q_f64(float64x2_t __p0, fl #else __ai __attribute__((target("neon"))) float64x2_t vtrn1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64330,10 +66644,10 @@ __ai __attribute__((target("neon"))) float32x4_t vtrn1q_f32(float32x4_t __p0, fl #else __ai __attribute__((target("neon"))) float32x4_t vtrn1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64347,10 +66661,10 @@ __ai __attribute__((target("neon"))) int32x4_t vtrn1q_s32(int32x4_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x4_t vtrn1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64364,10 +66678,27 @@ __ai __attribute__((target("neon"))) int64x2_t vtrn1q_s64(int64x2_t __p0, int64x #else __ai __attribute__((target("neon"))) int64x2_t vtrn1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vtrn1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vtrn1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64381,10 +66712,10 @@ __ai __attribute__((target("neon"))) int16x8_t vtrn1q_s16(int16x8_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x8_t vtrn1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64398,10 +66729,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vtrn1_u8(uint8x8_t __p0, uint8x8_ #else __ai __attribute__((target("neon"))) uint8x8_t vtrn1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64415,10 +66746,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vtrn1_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x2_t vtrn1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -64432,10 +66763,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vtrn1_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x4_t vtrn1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64449,10 +66780,10 @@ __ai __attribute__((target("neon"))) int8x8_t vtrn1_s8(int8x8_t __p0, int8x8_t _ #else __ai __attribute__((target("neon"))) int8x8_t vtrn1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64466,10 +66797,10 @@ __ai __attribute__((target("neon"))) float32x2_t vtrn1_f32(float32x2_t __p0, flo #else __ai __attribute__((target("neon"))) float32x2_t vtrn1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -64483,10 +66814,27 @@ __ai __attribute__((target("neon"))) int32x2_t vtrn1_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int32x2_t vtrn1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtrn1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 2, 10, 4, 12, 6, 14); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtrn1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64500,10 +66848,10 @@ __ai __attribute__((target("neon"))) int16x4_t vtrn1_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int16x4_t vtrn1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64517,10 +66865,10 @@ __ai __attribute__((target("neon"))) float16x8_t vtrn1q_f16(float16x8_t __p0, fl #else __ai __attribute__((target("neon"))) float16x8_t vtrn1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 2, 10, 4, 12, 6, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64534,10 +66882,10 @@ __ai __attribute__((target("neon"))) float16x4_t vtrn1_f16(float16x4_t __p0, flo #else __ai __attribute__((target("neon"))) float16x4_t vtrn1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 2, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64551,10 +66899,10 @@ __ai __attribute__((target("neon"))) poly8x8_t vtrn2_p8(poly8x8_t __p0, poly8x8_ #else __ai __attribute__((target("neon"))) poly8x8_t vtrn2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64568,10 +66916,10 @@ __ai __attribute__((target("neon"))) poly16x4_t vtrn2_p16(poly16x4_t __p0, poly1 #else __ai __attribute__((target("neon"))) poly16x4_t vtrn2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64585,10 +66933,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vtrn2q_p8(poly8x16_t __p0, poly8 #else __ai __attribute__((target("neon"))) poly8x16_t vtrn2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64602,10 +66950,10 @@ __ai __attribute__((target("neon"))) poly64x2_t vtrn2q_p64(poly64x2_t __p0, poly #else __ai __attribute__((target("neon"))) poly64x2_t vtrn2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64619,10 +66967,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vtrn2q_p16(poly16x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly16x8_t vtrn2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64636,10 +66984,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vtrn2q_u8(uint8x16_t __p0, uint8 #else __ai __attribute__((target("neon"))) uint8x16_t vtrn2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64653,10 +67001,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vtrn2q_u32(uint32x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x4_t vtrn2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64670,10 +67018,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vtrn2q_u64(uint64x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint64x2_t vtrn2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64687,10 +67035,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vtrn2q_u16(uint16x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x8_t vtrn2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64704,10 +67052,10 @@ __ai __attribute__((target("neon"))) int8x16_t vtrn2q_s8(int8x16_t __p0, int8x16 #else __ai __attribute__((target("neon"))) int8x16_t vtrn2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64721,10 +67069,10 @@ __ai __attribute__((target("neon"))) float64x2_t vtrn2q_f64(float64x2_t __p0, fl #else __ai __attribute__((target("neon"))) float64x2_t vtrn2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64738,10 +67086,10 @@ __ai __attribute__((target("neon"))) float32x4_t vtrn2q_f32(float32x4_t __p0, fl #else __ai __attribute__((target("neon"))) float32x4_t vtrn2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64755,10 +67103,10 @@ __ai __attribute__((target("neon"))) int32x4_t vtrn2q_s32(int32x4_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x4_t vtrn2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -64772,10 +67120,27 @@ __ai __attribute__((target("neon"))) int64x2_t vtrn2q_s64(int64x2_t __p0, int64x #else __ai __attribute__((target("neon"))) int64x2_t vtrn2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vtrn2q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vtrn2q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -64789,10 +67154,10 @@ __ai __attribute__((target("neon"))) int16x8_t vtrn2q_s16(int16x8_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x8_t vtrn2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64806,10 +67171,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vtrn2_u8(uint8x8_t __p0, uint8x8_ #else __ai __attribute__((target("neon"))) uint8x8_t vtrn2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64823,10 +67188,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vtrn2_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x2_t vtrn2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -64840,10 +67205,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vtrn2_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x4_t vtrn2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64857,10 +67222,10 @@ __ai __attribute__((target("neon"))) int8x8_t vtrn2_s8(int8x8_t __p0, int8x8_t _ #else __ai __attribute__((target("neon"))) int8x8_t vtrn2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64874,10 +67239,10 @@ __ai __attribute__((target("neon"))) float32x2_t vtrn2_f32(float32x2_t __p0, flo #else __ai __attribute__((target("neon"))) float32x2_t vtrn2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -64891,10 +67256,27 @@ __ai __attribute__((target("neon"))) int32x2_t vtrn2_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int32x2_t vtrn2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vtrn2_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 9, 3, 11, 5, 13, 7, 15); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vtrn2_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -64908,10 +67290,10 @@ __ai __attribute__((target("neon"))) int16x4_t vtrn2_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int16x4_t vtrn2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -64925,10 +67307,10 @@ __ai __attribute__((target("neon"))) float16x8_t vtrn2q_f16(float16x8_t __p0, fl #else __ai __attribute__((target("neon"))) float16x8_t vtrn2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 9, 3, 11, 5, 13, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -64942,32 +67324,32 @@ __ai __attribute__((target("neon"))) float16x4_t vtrn2_f16(float16x4_t __p0, flo #else __ai __attribute__((target("neon"))) float16x4_t vtrn2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 5, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vtst_p64(poly64x1_t __p0, poly64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vtstq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vtstq_p64(poly64x2_t __p0, poly64x2_t __p1) { uint64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64975,16 +67357,16 @@ __ai __attribute__((target("neon"))) uint64x2_t vtstq_p64(poly64x2_t __p0, poly6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vtstq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vtstq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -64992,73 +67374,73 @@ __ai __attribute__((target("neon"))) uint64x2_t vtstq_u64(uint64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vtstq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vtstq_s64(int64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vtstq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vtstq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("neon"))) uint64x1_t vtst_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64x1_t vtst_s64(int64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; - __ret = (uint64x1_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 19); + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vtst_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 19)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vtstd_u64(uint64_t __p0, uint64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vtstd_u64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vtstd_u64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) uint64_t vtstd_s64(int64_t __p0, int64_t __p1) { uint64_t __ret; - __ret = (uint64_t) __builtin_neon_vtstd_s64(__p0, __p1); + __ret = __builtin_bit_cast(uint64_t, __builtin_neon_vtstd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { int8_t __ret; - __ret = (int8_t) __builtin_neon_vuqaddb_s8(__p0, __p1); + __ret = __builtin_bit_cast(int8_t, __builtin_neon_vuqaddb_s8(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int32_t vuqadds_s32(int32_t __p0, uint32_t __p1) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vuqadds_s32(__p0, __p1); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vuqadds_s32(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int64_t vuqaddd_s64(int64_t __p0, uint64_t __p1) { int64_t __ret; - __ret = (int64_t) __builtin_neon_vuqaddd_s64(__p0, __p1); + __ret = __builtin_bit_cast(int64_t, __builtin_neon_vuqaddd_s64(__p0, __p1)); return __ret; } __ai __attribute__((target("neon"))) int16_t vuqaddh_s16(int16_t __p0, uint16_t __p1) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vuqaddh_s16(__p0, __p1); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vuqaddh_s16(__p0, __p1)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 32)); return __ret; } #else __ai __attribute__((target("neon"))) int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65066,16 +67448,16 @@ __ai __attribute__((target("neon"))) int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vuqaddq_s32(int32x4_t __p0, uint32x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 34)); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vuqaddq_s32(int32x4_t __p0, uint32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65083,16 +67465,16 @@ __ai __attribute__((target("neon"))) int32x4_t vuqaddq_s32(int32x4_t __p0, uint3 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vuqaddq_s64(int64x2_t __p0, uint64x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 35)); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vuqaddq_s64(int64x2_t __p0, uint64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65100,16 +67482,16 @@ __ai __attribute__((target("neon"))) int64x2_t vuqaddq_s64(int64x2_t __p0, uint6 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vuqaddq_s16(int16x8_t __p0, uint16x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 33)); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vuqaddq_s16(int16x8_t __p0, uint16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vuqaddq_v(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65117,16 +67499,16 @@ __ai __attribute__((target("neon"))) int16x8_t vuqaddq_s16(int16x8_t __p0, uint1 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t __p1) { int8x8_t __ret; - __ret = (int8x8_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 0)); return __ret; } #else __ai __attribute__((target("neon"))) int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x8_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int8x8_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 0)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65134,38 +67516,38 @@ __ai __attribute__((target("neon"))) int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { int32x2_t __ret; - __ret = (int32x2_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 2)); return __ret; } #else __ai __attribute__((target("neon"))) int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int32x2_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int32x2_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 2)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif __ai __attribute__((target("neon"))) int64x1_t vuqadd_s64(int64x1_t __p0, uint64x1_t __p1) { int64x1_t __ret; - __ret = (int64x1_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 3)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __ret; - __ret = (int16x4_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __p0), __builtin_bit_cast(int8x8_t, __p1), 1)); return __ret; } #else __ai __attribute__((target("neon"))) int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int16x4_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int16x4_t, __builtin_neon_vuqadd_v(__builtin_bit_cast(int8x8_t, __rev0), __builtin_bit_cast(int8x8_t, __rev1), 1)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65179,10 +67561,10 @@ __ai __attribute__((target("neon"))) poly8x8_t vuzp1_p8(poly8x8_t __p0, poly8x8_ #else __ai __attribute__((target("neon"))) poly8x8_t vuzp1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65196,10 +67578,10 @@ __ai __attribute__((target("neon"))) poly16x4_t vuzp1_p16(poly16x4_t __p0, poly1 #else __ai __attribute__((target("neon"))) poly16x4_t vuzp1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65213,10 +67595,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vuzp1q_p8(poly8x16_t __p0, poly8 #else __ai __attribute__((target("neon"))) poly8x16_t vuzp1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65230,10 +67612,10 @@ __ai __attribute__((target("neon"))) poly64x2_t vuzp1q_p64(poly64x2_t __p0, poly #else __ai __attribute__((target("neon"))) poly64x2_t vuzp1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65247,10 +67629,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vuzp1q_p16(poly16x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly16x8_t vuzp1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65264,10 +67646,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vuzp1q_u8(uint8x16_t __p0, uint8 #else __ai __attribute__((target("neon"))) uint8x16_t vuzp1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65281,10 +67663,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vuzp1q_u32(uint32x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x4_t vuzp1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65298,10 +67680,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vuzp1q_u64(uint64x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint64x2_t vuzp1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65315,10 +67697,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vuzp1q_u16(uint16x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x8_t vuzp1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65332,10 +67714,10 @@ __ai __attribute__((target("neon"))) int8x16_t vuzp1q_s8(int8x16_t __p0, int8x16 #else __ai __attribute__((target("neon"))) int8x16_t vuzp1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65349,10 +67731,10 @@ __ai __attribute__((target("neon"))) float64x2_t vuzp1q_f64(float64x2_t __p0, fl #else __ai __attribute__((target("neon"))) float64x2_t vuzp1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65366,10 +67748,10 @@ __ai __attribute__((target("neon"))) float32x4_t vuzp1q_f32(float32x4_t __p0, fl #else __ai __attribute__((target("neon"))) float32x4_t vuzp1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65383,10 +67765,10 @@ __ai __attribute__((target("neon"))) int32x4_t vuzp1q_s32(int32x4_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x4_t vuzp1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65400,10 +67782,27 @@ __ai __attribute__((target("neon"))) int64x2_t vuzp1q_s64(int64x2_t __p0, int64x #else __ai __attribute__((target("neon"))) int64x2_t vuzp1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vuzp1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vuzp1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65417,10 +67816,10 @@ __ai __attribute__((target("neon"))) int16x8_t vuzp1q_s16(int16x8_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x8_t vuzp1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65434,10 +67833,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vuzp1_u8(uint8x8_t __p0, uint8x8_ #else __ai __attribute__((target("neon"))) uint8x8_t vuzp1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65451,10 +67850,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vuzp1_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x2_t vuzp1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -65468,10 +67867,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vuzp1_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x4_t vuzp1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65485,10 +67884,10 @@ __ai __attribute__((target("neon"))) int8x8_t vuzp1_s8(int8x8_t __p0, int8x8_t _ #else __ai __attribute__((target("neon"))) int8x8_t vuzp1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65502,10 +67901,10 @@ __ai __attribute__((target("neon"))) float32x2_t vuzp1_f32(float32x2_t __p0, flo #else __ai __attribute__((target("neon"))) float32x2_t vuzp1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -65519,10 +67918,27 @@ __ai __attribute__((target("neon"))) int32x2_t vuzp1_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int32x2_t vuzp1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vuzp1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 2, 4, 6, 8, 10, 12, 14); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vuzp1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65536,10 +67952,10 @@ __ai __attribute__((target("neon"))) int16x4_t vuzp1_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int16x4_t vuzp1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65553,10 +67969,10 @@ __ai __attribute__((target("neon"))) float16x8_t vuzp1q_f16(float16x8_t __p0, fl #else __ai __attribute__((target("neon"))) float16x8_t vuzp1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6, 8, 10, 12, 14); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65570,10 +67986,10 @@ __ai __attribute__((target("neon"))) float16x4_t vuzp1_f16(float16x4_t __p0, flo #else __ai __attribute__((target("neon"))) float16x4_t vuzp1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2, 4, 6); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65587,10 +68003,10 @@ __ai __attribute__((target("neon"))) poly8x8_t vuzp2_p8(poly8x8_t __p0, poly8x8_ #else __ai __attribute__((target("neon"))) poly8x8_t vuzp2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65604,10 +68020,10 @@ __ai __attribute__((target("neon"))) poly16x4_t vuzp2_p16(poly16x4_t __p0, poly1 #else __ai __attribute__((target("neon"))) poly16x4_t vuzp2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65621,10 +68037,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vuzp2q_p8(poly8x16_t __p0, poly8 #else __ai __attribute__((target("neon"))) poly8x16_t vuzp2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65638,10 +68054,10 @@ __ai __attribute__((target("neon"))) poly64x2_t vuzp2q_p64(poly64x2_t __p0, poly #else __ai __attribute__((target("neon"))) poly64x2_t vuzp2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65655,10 +68071,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vuzp2q_p16(poly16x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly16x8_t vuzp2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65672,10 +68088,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vuzp2q_u8(uint8x16_t __p0, uint8 #else __ai __attribute__((target("neon"))) uint8x16_t vuzp2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65689,10 +68105,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vuzp2q_u32(uint32x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x4_t vuzp2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65706,10 +68122,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vuzp2q_u64(uint64x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint64x2_t vuzp2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65723,10 +68139,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vuzp2q_u16(uint16x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x8_t vuzp2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65740,10 +68156,10 @@ __ai __attribute__((target("neon"))) int8x16_t vuzp2q_s8(int8x16_t __p0, int8x16 #else __ai __attribute__((target("neon"))) int8x16_t vuzp2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65757,10 +68173,10 @@ __ai __attribute__((target("neon"))) float64x2_t vuzp2q_f64(float64x2_t __p0, fl #else __ai __attribute__((target("neon"))) float64x2_t vuzp2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -65774,10 +68190,10 @@ __ai __attribute__((target("neon"))) float32x4_t vuzp2q_f32(float32x4_t __p0, fl #else __ai __attribute__((target("neon"))) float32x4_t vuzp2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65791,10 +68207,10 @@ __ai __attribute__((target("neon"))) int32x4_t vuzp2q_s32(int32x4_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x4_t vuzp2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -65808,10 +68224,27 @@ __ai __attribute__((target("neon"))) int64x2_t vuzp2q_s64(int64x2_t __p0, int64x #else __ai __attribute__((target("neon"))) int64x2_t vuzp2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vuzp2q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vuzp2q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -65825,10 +68258,10 @@ __ai __attribute__((target("neon"))) int16x8_t vuzp2q_s16(int16x8_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x8_t vuzp2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65842,10 +68275,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vuzp2_u8(uint8x8_t __p0, uint8x8_ #else __ai __attribute__((target("neon"))) uint8x8_t vuzp2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65859,10 +68292,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vuzp2_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x2_t vuzp2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -65876,10 +68309,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vuzp2_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x4_t vuzp2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65893,10 +68326,10 @@ __ai __attribute__((target("neon"))) int8x8_t vuzp2_s8(int8x8_t __p0, int8x8_t _ #else __ai __attribute__((target("neon"))) int8x8_t vuzp2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65910,10 +68343,10 @@ __ai __attribute__((target("neon"))) float32x2_t vuzp2_f32(float32x2_t __p0, flo #else __ai __attribute__((target("neon"))) float32x2_t vuzp2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -65927,10 +68360,27 @@ __ai __attribute__((target("neon"))) int32x2_t vuzp2_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int32x2_t vuzp2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vuzp2_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 1, 3, 5, 7, 9, 11, 13, 15); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vuzp2_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -65944,10 +68394,10 @@ __ai __attribute__((target("neon"))) int16x4_t vuzp2_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int16x4_t vuzp2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65961,10 +68411,10 @@ __ai __attribute__((target("neon"))) float16x8_t vuzp2q_f16(float16x8_t __p0, fl #else __ai __attribute__((target("neon"))) float16x8_t vuzp2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7, 9, 11, 13, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -65978,10 +68428,10 @@ __ai __attribute__((target("neon"))) float16x4_t vuzp2_f16(float16x4_t __p0, flo #else __ai __attribute__((target("neon"))) float16x4_t vuzp2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3, 5, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -65995,10 +68445,10 @@ __ai __attribute__((target("neon"))) poly8x8_t vzip1_p8(poly8x8_t __p0, poly8x8_ #else __ai __attribute__((target("neon"))) poly8x8_t vzip1_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66012,10 +68462,10 @@ __ai __attribute__((target("neon"))) poly16x4_t vzip1_p16(poly16x4_t __p0, poly1 #else __ai __attribute__((target("neon"))) poly16x4_t vzip1_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66029,10 +68479,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vzip1q_p8(poly8x16_t __p0, poly8 #else __ai __attribute__((target("neon"))) poly8x16_t vzip1q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66046,10 +68496,10 @@ __ai __attribute__((target("neon"))) poly64x2_t vzip1q_p64(poly64x2_t __p0, poly #else __ai __attribute__((target("neon"))) poly64x2_t vzip1q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -66063,10 +68513,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vzip1q_p16(poly16x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly16x8_t vzip1q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66080,10 +68530,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vzip1q_u8(uint8x16_t __p0, uint8 #else __ai __attribute__((target("neon"))) uint8x16_t vzip1q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66097,10 +68547,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vzip1q_u32(uint32x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x4_t vzip1q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -66114,10 +68564,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vzip1q_u64(uint64x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint64x2_t vzip1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -66131,10 +68581,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vzip1q_u16(uint16x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x8_t vzip1q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66148,10 +68598,10 @@ __ai __attribute__((target("neon"))) int8x16_t vzip1q_s8(int8x16_t __p0, int8x16 #else __ai __attribute__((target("neon"))) int8x16_t vzip1q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66165,10 +68615,10 @@ __ai __attribute__((target("neon"))) float64x2_t vzip1q_f64(float64x2_t __p0, fl #else __ai __attribute__((target("neon"))) float64x2_t vzip1q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -66182,10 +68632,10 @@ __ai __attribute__((target("neon"))) float32x4_t vzip1q_f32(float32x4_t __p0, fl #else __ai __attribute__((target("neon"))) float32x4_t vzip1q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -66199,10 +68649,10 @@ __ai __attribute__((target("neon"))) int32x4_t vzip1q_s32(int32x4_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x4_t vzip1q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -66216,10 +68666,27 @@ __ai __attribute__((target("neon"))) int64x2_t vzip1q_s64(int64x2_t __p0, int64x #else __ai __attribute__((target("neon"))) int64x2_t vzip1q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vzip1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vzip1q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66233,10 +68700,10 @@ __ai __attribute__((target("neon"))) int16x8_t vzip1q_s16(int16x8_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x8_t vzip1q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66250,10 +68717,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vzip1_u8(uint8x8_t __p0, uint8x8_ #else __ai __attribute__((target("neon"))) uint8x8_t vzip1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66267,10 +68734,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vzip1_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x2_t vzip1_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -66284,10 +68751,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vzip1_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x4_t vzip1_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66301,10 +68768,10 @@ __ai __attribute__((target("neon"))) int8x8_t vzip1_s8(int8x8_t __p0, int8x8_t _ #else __ai __attribute__((target("neon"))) int8x8_t vzip1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66318,10 +68785,10 @@ __ai __attribute__((target("neon"))) float32x2_t vzip1_f32(float32x2_t __p0, flo #else __ai __attribute__((target("neon"))) float32x2_t vzip1_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -66335,10 +68802,27 @@ __ai __attribute__((target("neon"))) int32x2_t vzip1_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int32x2_t vzip1_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vzip1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 0, 8, 1, 9, 2, 10, 3, 11); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vzip1_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66352,10 +68836,10 @@ __ai __attribute__((target("neon"))) int16x4_t vzip1_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int16x4_t vzip1_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66369,10 +68853,10 @@ __ai __attribute__((target("neon"))) float16x8_t vzip1q_f16(float16x8_t __p0, fl #else __ai __attribute__((target("neon"))) float16x8_t vzip1q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 8, 1, 9, 2, 10, 3, 11); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66386,10 +68870,10 @@ __ai __attribute__((target("neon"))) float16x4_t vzip1_f16(float16x4_t __p0, flo #else __ai __attribute__((target("neon"))) float16x4_t vzip1_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 0, 4, 1, 5); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66403,10 +68887,10 @@ __ai __attribute__((target("neon"))) poly8x8_t vzip2_p8(poly8x8_t __p0, poly8x8_ #else __ai __attribute__((target("neon"))) poly8x8_t vzip2_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; - poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + poly8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66420,10 +68904,10 @@ __ai __attribute__((target("neon"))) poly16x4_t vzip2_p16(poly16x4_t __p0, poly1 #else __ai __attribute__((target("neon"))) poly16x4_t vzip2_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + poly16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66437,10 +68921,10 @@ __ai __attribute__((target("neon"))) poly8x16_t vzip2q_p8(poly8x16_t __p0, poly8 #else __ai __attribute__((target("neon"))) poly8x16_t vzip2q_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; - poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + poly8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66454,10 +68938,10 @@ __ai __attribute__((target("neon"))) poly64x2_t vzip2q_p64(poly64x2_t __p0, poly #else __ai __attribute__((target("neon"))) poly64x2_t vzip2q_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -66471,10 +68955,10 @@ __ai __attribute__((target("neon"))) poly16x8_t vzip2q_p16(poly16x8_t __p0, poly #else __ai __attribute__((target("neon"))) poly16x8_t vzip2q_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + poly16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66488,10 +68972,10 @@ __ai __attribute__((target("neon"))) uint8x16_t vzip2q_u8(uint8x16_t __p0, uint8 #else __ai __attribute__((target("neon"))) uint8x16_t vzip2q_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66505,10 +68989,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vzip2q_u32(uint32x4_t __p0, uint #else __ai __attribute__((target("neon"))) uint32x4_t vzip2q_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -66522,10 +69006,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vzip2q_u64(uint64x2_t __p0, uint #else __ai __attribute__((target("neon"))) uint64x2_t vzip2q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -66539,10 +69023,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vzip2q_u16(uint16x8_t __p0, uint #else __ai __attribute__((target("neon"))) uint16x8_t vzip2q_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66556,10 +69040,10 @@ __ai __attribute__((target("neon"))) int8x16_t vzip2q_s8(int8x16_t __p0, int8x16 #else __ai __attribute__((target("neon"))) int8x16_t vzip2q_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66573,10 +69057,10 @@ __ai __attribute__((target("neon"))) float64x2_t vzip2q_f64(float64x2_t __p0, fl #else __ai __attribute__((target("neon"))) float64x2_t vzip2q_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -66590,10 +69074,10 @@ __ai __attribute__((target("neon"))) float32x4_t vzip2q_f32(float32x4_t __p0, fl #else __ai __attribute__((target("neon"))) float32x4_t vzip2q_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -66607,10 +69091,10 @@ __ai __attribute__((target("neon"))) int32x4_t vzip2q_s32(int32x4_t __p0, int32x #else __ai __attribute__((target("neon"))) int32x4_t vzip2q_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -66624,10 +69108,27 @@ __ai __attribute__((target("neon"))) int64x2_t vzip2q_s64(int64x2_t __p0, int64x #else __ai __attribute__((target("neon"))) int64x2_t vzip2q_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x16_t vzip2q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x16_t vzip2q_mf8(mfloat8x16_t __p0, mfloat8x16_t __p1) { + mfloat8x16_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66641,10 +69142,10 @@ __ai __attribute__((target("neon"))) int16x8_t vzip2q_s16(int16x8_t __p0, int16x #else __ai __attribute__((target("neon"))) int16x8_t vzip2q_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66658,10 +69159,10 @@ __ai __attribute__((target("neon"))) uint8x8_t vzip2_u8(uint8x8_t __p0, uint8x8_ #else __ai __attribute__((target("neon"))) uint8x8_t vzip2_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66675,10 +69176,10 @@ __ai __attribute__((target("neon"))) uint32x2_t vzip2_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x2_t vzip2_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -66692,10 +69193,10 @@ __ai __attribute__((target("neon"))) uint16x4_t vzip2_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x4_t vzip2_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66709,10 +69210,10 @@ __ai __attribute__((target("neon"))) int8x8_t vzip2_s8(int8x8_t __p0, int8x8_t _ #else __ai __attribute__((target("neon"))) int8x8_t vzip2_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66726,10 +69227,10 @@ __ai __attribute__((target("neon"))) float32x2_t vzip2_f32(float32x2_t __p0, flo #else __ai __attribute__((target("neon"))) float32x2_t vzip2_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -66743,10 +69244,27 @@ __ai __attribute__((target("neon"))) int32x2_t vzip2_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int32x2_t vzip2_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __builtin_shufflevector(__rev0, __rev1, 1, 3); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon"))) mfloat8x8_t vzip2_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + __ret = __builtin_shufflevector(__p0, __p1, 4, 12, 5, 13, 6, 14, 7, 15); + return __ret; +} +#else +__ai __attribute__((target("neon"))) mfloat8x8_t vzip2_mf8(mfloat8x8_t __p0, mfloat8x8_t __p1) { + mfloat8x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -66760,10 +69278,10 @@ __ai __attribute__((target("neon"))) int16x4_t vzip2_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int16x4_t vzip2_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66777,10 +69295,10 @@ __ai __attribute__((target("neon"))) float16x8_t vzip2q_f16(float16x8_t __p0, fl #else __ai __attribute__((target("neon"))) float16x8_t vzip2q_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; - float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __builtin_shufflevector(__rev0, __rev1, 4, 12, 5, 13, 6, 14, 7, 15); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -66794,10 +69312,10 @@ __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, flo #else __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; - float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __builtin_shufflevector(__rev0, __rev1, 2, 6, 3, 7); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -66805,23 +69323,23 @@ __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, flo #define vldap1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __ret; \ poly64x1_t __s1 = __p1; \ - __ret = (poly64x1_t) __builtin_neon_vldap1_lane_p64(__p0, (int8x8_t)__s1, __p2, 6); \ + __ret = __builtin_bit_cast(poly64x1_t, __builtin_neon_vldap1_lane_p64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 6)); \ __ret; \ }) #ifdef __LITTLE_ENDIAN__ #define vldap1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ - __ret = (poly64x2_t) __builtin_neon_vldap1q_lane_p64(__p0, (int8x16_t)__s1, __p2, 38); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vldap1q_lane_p64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 38)); \ __ret; \ }) #else #define vldap1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __ret; \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (poly64x2_t) __builtin_neon_vldap1q_lane_p64(__p0, (int8x16_t)__rev1, __p2, 38); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(poly64x2_t, __builtin_neon_vldap1q_lane_p64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 38)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -66830,16 +69348,16 @@ __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, flo #define vldap1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vldap1q_lane_u64(__p0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vldap1q_lane_u64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else #define vldap1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __ret; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vldap1q_lane_u64(__p0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vldap1q_lane_u64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -66848,16 +69366,16 @@ __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, flo #define vldap1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ - __ret = (float64x2_t) __builtin_neon_vldap1q_lane_f64(__p0, (int8x16_t)__s1, __p2, 42); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vldap1q_lane_f64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 42)); \ __ret; \ }) #else #define vldap1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __ret; \ float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (float64x2_t) __builtin_neon_vldap1q_lane_f64(__p0, (int8x16_t)__rev1, __p2, 42); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vldap1q_lane_f64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 42)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -66866,16 +69384,16 @@ __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, flo #define vldap1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ - __ret = (int64x2_t) __builtin_neon_vldap1q_lane_s64(__p0, (int8x16_t)__s1, __p2, 35); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vldap1q_lane_s64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 35)); \ __ret; \ }) #else #define vldap1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __ret; \ int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (int64x2_t) __builtin_neon_vldap1q_lane_s64(__p0, (int8x16_t)__rev1, __p2, 35); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vldap1q_lane_s64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 35)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -66883,103 +69401,103 @@ __ai __attribute__((target("neon"))) float16x4_t vzip2_f16(float16x4_t __p0, flo #define vldap1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __ret; \ uint64x1_t __s1 = __p1; \ - __ret = (uint64x1_t) __builtin_neon_vldap1_lane_u64(__p0, (int8x8_t)__s1, __p2, 19); \ + __ret = __builtin_bit_cast(uint64x1_t, __builtin_neon_vldap1_lane_u64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 19)); \ __ret; \ }) #define vldap1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __ret; \ float64x1_t __s1 = __p1; \ - __ret = (float64x1_t) __builtin_neon_vldap1_lane_f64(__p0, (int8x8_t)__s1, __p2, 10); \ + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vldap1_lane_f64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 10)); \ __ret; \ }) #define vldap1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __ret; \ int64x1_t __s1 = __p1; \ - __ret = (int64x1_t) __builtin_neon_vldap1_lane_s64(__p0, (int8x8_t)__s1, __p2, 3); \ + __ret = __builtin_bit_cast(int64x1_t, __builtin_neon_vldap1_lane_s64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 3)); \ __ret; \ }) #define vstl1_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x1_t __s1 = __p1; \ - __builtin_neon_vstl1_lane_p64(__p0, (int8x8_t)__s1, __p2, 6); \ + __builtin_neon_vstl1_lane_p64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 6); \ }) #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ - __builtin_neon_vstl1q_lane_p64(__p0, (int8x16_t)__s1, __p2, 38); \ + __builtin_neon_vstl1q_lane_p64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 38); \ }) #else #define vstl1q_lane_p64(__p0, __p1, __p2) __extension__ ({ \ poly64x2_t __s1 = __p1; \ - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vstl1q_lane_p64(__p0, (int8x16_t)__rev1, __p2, 38); \ + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vstl1q_lane_p64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 38); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - __builtin_neon_vstl1q_lane_u64(__p0, (int8x16_t)__s1, __p2, 51); \ + __builtin_neon_vstl1q_lane_u64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 51); \ }) #else #define vstl1q_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vstl1q_lane_u64(__p0, (int8x16_t)__rev1, __p2, 51); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vstl1q_lane_u64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 51); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ - __builtin_neon_vstl1q_lane_f64(__p0, (int8x16_t)__s1, __p2, 42); \ + __builtin_neon_vstl1q_lane_f64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 42); \ }) #else #define vstl1q_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x2_t __s1 = __p1; \ - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vstl1q_lane_f64(__p0, (int8x16_t)__rev1, __p2, 42); \ + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vstl1q_lane_f64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 42); \ }) #endif #ifdef __LITTLE_ENDIAN__ #define vstl1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ - __builtin_neon_vstl1q_lane_s64(__p0, (int8x16_t)__s1, __p2, 35); \ + __builtin_neon_vstl1q_lane_s64(__p0, __builtin_bit_cast(int8x16_t, __s1), __p2, 35); \ }) #else #define vstl1q_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x2_t __s1 = __p1; \ - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __builtin_neon_vstl1q_lane_s64(__p0, (int8x16_t)__rev1, __p2, 35); \ + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __builtin_neon_vstl1q_lane_s64(__p0, __builtin_bit_cast(int8x16_t, __rev1), __p2, 35); \ }) #endif #define vstl1_lane_u64(__p0, __p1, __p2) __extension__ ({ \ uint64x1_t __s1 = __p1; \ - __builtin_neon_vstl1_lane_u64(__p0, (int8x8_t)__s1, __p2, 19); \ + __builtin_neon_vstl1_lane_u64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 19); \ }) #define vstl1_lane_f64(__p0, __p1, __p2) __extension__ ({ \ float64x1_t __s1 = __p1; \ - __builtin_neon_vstl1_lane_f64(__p0, (int8x8_t)__s1, __p2, 10); \ + __builtin_neon_vstl1_lane_f64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 10); \ }) #define vstl1_lane_s64(__p0, __p1, __p2) __extension__ ({ \ int64x1_t __s1 = __p1; \ - __builtin_neon_vstl1_lane_s64(__p0, (int8x8_t)__s1, __p2, 3); \ + __builtin_neon_vstl1_lane_s64(__p0, __builtin_bit_cast(int8x8_t, __s1), __p2, 3); \ }) #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint8x16_t vbcaxq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_vbcaxq_u8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vbcaxq_u8(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint8x16_t vbcaxq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_vbcaxq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_vbcaxq_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -66987,17 +69505,17 @@ __ai __attribute__((target("sha3,neon"))) uint8x16_t vbcaxq_u8(uint8x16_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint32x4_t vbcaxq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vbcaxq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vbcaxq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint32x4_t vbcaxq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vbcaxq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vbcaxq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67005,17 +69523,17 @@ __ai __attribute__((target("sha3,neon"))) uint32x4_t vbcaxq_u32(uint32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t vbcaxq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vbcaxq_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vbcaxq_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t vbcaxq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vbcaxq_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vbcaxq_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67023,17 +69541,17 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vbcaxq_u64(uint64x2_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint16x8_t vbcaxq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vbcaxq_u16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vbcaxq_u16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 49)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint16x8_t vbcaxq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_vbcaxq_u16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_vbcaxq_u16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -67041,17 +69559,17 @@ __ai __attribute__((target("sha3,neon"))) uint16x8_t vbcaxq_u16(uint16x8_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int8x16_t vbcaxq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_vbcaxq_s8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vbcaxq_s8(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int8x16_t vbcaxq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_vbcaxq_s8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_vbcaxq_s8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -67059,17 +69577,17 @@ __ai __attribute__((target("sha3,neon"))) int8x16_t vbcaxq_s8(int8x16_t __p0, in #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int32x4_t vbcaxq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_vbcaxq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vbcaxq_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int32x4_t vbcaxq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_vbcaxq_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_vbcaxq_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67077,17 +69595,17 @@ __ai __attribute__((target("sha3,neon"))) int32x4_t vbcaxq_s32(int32x4_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int64x2_t vbcaxq_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_vbcaxq_s64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vbcaxq_s64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 35)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int64x2_t vbcaxq_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int64x2_t) __builtin_neon_vbcaxq_s64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_vbcaxq_s64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67095,17 +69613,17 @@ __ai __attribute__((target("sha3,neon"))) int64x2_t vbcaxq_s64(int64x2_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int16x8_t vbcaxq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_vbcaxq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vbcaxq_s16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int16x8_t vbcaxq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_vbcaxq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_vbcaxq_s16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -67113,17 +69631,17 @@ __ai __attribute__((target("sha3,neon"))) int16x8_t vbcaxq_s16(int16x8_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint8x16_t veor3q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - __ret = (uint8x16_t) __builtin_neon_veor3q_u8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_veor3q_u8(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 48)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint8x16_t veor3q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint8x16_t) __builtin_neon_veor3q_u8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 48); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(uint8x16_t, __builtin_neon_veor3q_u8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 48)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -67131,17 +69649,17 @@ __ai __attribute__((target("sha3,neon"))) uint8x16_t veor3q_u8(uint8x16_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint32x4_t veor3q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_veor3q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_veor3q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint32x4_t veor3q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_veor3q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_veor3q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67149,17 +69667,17 @@ __ai __attribute__((target("sha3,neon"))) uint32x4_t veor3q_u32(uint32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t veor3q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_veor3q_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_veor3q_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t veor3q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_veor3q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_veor3q_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67167,17 +69685,17 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t veor3q_u64(uint64x2_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint16x8_t veor3q_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_veor3q_u16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_veor3q_u16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 49)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint16x8_t veor3q_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t) __builtin_neon_veor3q_u16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(uint16x8_t, __builtin_neon_veor3q_u16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 49)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -67185,17 +69703,17 @@ __ai __attribute__((target("sha3,neon"))) uint16x8_t veor3q_u16(uint16x8_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int8x16_t veor3q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - __ret = (int8x16_t) __builtin_neon_veor3q_s8((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_veor3q_s8(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 32)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int8x16_t veor3q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int8x16_t) __builtin_neon_veor3q_s8((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); + __ret = __builtin_bit_cast(int8x16_t, __builtin_neon_veor3q_s8(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 32)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -67203,17 +69721,17 @@ __ai __attribute__((target("sha3,neon"))) int8x16_t veor3q_s8(int8x16_t __p0, in #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int32x4_t veor3q_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - __ret = (int32x4_t) __builtin_neon_veor3q_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_veor3q_s32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 34)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int32x4_t veor3q_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (int32x4_t) __builtin_neon_veor3q_s32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 34); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(int32x4_t, __builtin_neon_veor3q_s32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 34)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67221,17 +69739,17 @@ __ai __attribute__((target("sha3,neon"))) int32x4_t veor3q_s32(int32x4_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int64x2_t veor3q_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; - __ret = (int64x2_t) __builtin_neon_veor3q_s64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_veor3q_s64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 35)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int64x2_t veor3q_s64(int64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (int64x2_t) __builtin_neon_veor3q_s64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 35); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + int64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(int64x2_t, __builtin_neon_veor3q_s64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 35)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67239,17 +69757,17 @@ __ai __attribute__((target("sha3,neon"))) int64x2_t veor3q_s64(int64x2_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) int16x8_t veor3q_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - __ret = (int16x8_t) __builtin_neon_veor3q_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_veor3q_s16(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 33)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) int16x8_t veor3q_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t) __builtin_neon_veor3q_s16((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 33); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); + __ret = __builtin_bit_cast(int16x8_t, __builtin_neon_veor3q_s16(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 33)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -67257,16 +69775,16 @@ __ai __attribute__((target("sha3,neon"))) int16x8_t veor3q_s16(int16x8_t __p0, i #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t vrax1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vrax1q_u64((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrax1q_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t vrax1q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vrax1q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vrax1q_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67274,17 +69792,17 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vrax1q_u64(uint64x2_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512hq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsha512hq_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512hq_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512hq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vsha512hq_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512hq_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67292,17 +69810,17 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512hq_u64(uint64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512h2q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsha512h2q_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512h2q_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512h2q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vsha512h2q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512h2q_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67310,16 +69828,16 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512h2q_u64(uint64x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su0q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsha512su0q_u64((int8x16_t)__p0, (int8x16_t)__p1, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512su0q_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su0q_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vsha512su0q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512su0q_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67327,17 +69845,17 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su0q_u64(uint64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su1q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - __ret = (uint64x2_t) __builtin_neon_vsha512su1q_u64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512su1q_u64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 51)); return __ret; } #else __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su1q_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (uint64x2_t) __builtin_neon_vsha512su1q_u64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 51); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + uint64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vsha512su1q_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 51)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67347,7 +69865,7 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su1q_u64(uint64x2_t uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - __ret = (uint64x2_t) __builtin_neon_vxarq_u64((int8x16_t)__s0, (int8x16_t)__s1, __p2, 51); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vxarq_u64(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __p2, 51)); \ __ret; \ }) #else @@ -67355,10 +69873,10 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su1q_u64(uint64x2_t uint64x2_t __ret; \ uint64x2_t __s0 = __p0; \ uint64x2_t __s1 = __p1; \ - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 1, 0); \ - __ret = (uint64x2_t) __builtin_neon_vxarq_u64((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 51); \ - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_64); \ + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_64); \ + __ret = __builtin_bit_cast(uint64x2_t, __builtin_neon_vxarq_u64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __p2, 51)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); \ __ret; \ }) #endif @@ -67366,17 +69884,17 @@ __ai __attribute__((target("sha3,neon"))) uint64x2_t vsha512su1q_u64(uint64x2_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3partw1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsm3partw1q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3partw1q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3partw1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsm3partw1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3partw1q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67384,17 +69902,17 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3partw1q_u32(uint32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3partw2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsm3partw2q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3partw2q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3partw2q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsm3partw2q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3partw2q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67402,17 +69920,17 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3partw2q_u32(uint32x4_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsm3ss1q_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3ss1q_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 50)); return __ret; } #else __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsm3ss1q_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3ss1q_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67423,7 +69941,7 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt1aq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt1aq_u32(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 50)); \ __ret; \ }) #else @@ -67432,11 +69950,11 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt1aq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt1aq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -67447,7 +69965,7 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt1bq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt1bq_u32(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 50)); \ __ret; \ }) #else @@ -67456,11 +69974,11 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt1bq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt1bq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -67471,7 +69989,7 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt2aq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt2aq_u32(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 50)); \ __ret; \ }) #else @@ -67480,11 +69998,11 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt2aq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt2aq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -67495,7 +70013,7 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt2bq_u32((int8x16_t)__s0, (int8x16_t)__s1, (int8x16_t)__s2, __p3, 50); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt2bq_u32(__builtin_bit_cast(int8x16_t, __s0), __builtin_bit_cast(int8x16_t, __s1), __builtin_bit_cast(int8x16_t, __s2), __p3, 50)); \ __ret; \ }) #else @@ -67504,11 +70022,11 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 uint32x4_t __s0 = __p0; \ uint32x4_t __s1 = __p1; \ uint32x4_t __s2 = __p2; \ - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 3, 2, 1, 0); \ - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 3, 2, 1, 0); \ - __ret = (uint32x4_t) __builtin_neon_vsm3tt2bq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, __p3, 50); \ - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, __lane_reverse_128_32); \ + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, __lane_reverse_128_32); \ + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, __lane_reverse_128_32); \ + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm3tt2bq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), __p3, 50)); \ + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); \ __ret; \ }) #endif @@ -67516,16 +70034,16 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm3ss1q_u32(uint32x4_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm4eq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsm4eq_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm4eq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm4eq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsm4eq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm4eq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -67533,413 +70051,413 @@ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm4eq_u32(uint32x4_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm4ekeyq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t) __builtin_neon_vsm4ekeyq_u32((int8x16_t)__p0, (int8x16_t)__p1, 50); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm4ekeyq_u32(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 50)); return __ret; } #else __ai __attribute__((target("sm4,neon"))) uint32x4_t vsm4ekeyq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t) __builtin_neon_vsm4ekeyq_u32((int8x16_t)__rev0, (int8x16_t)__rev1, 50); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + __ret = __builtin_bit_cast(uint32x4_t, __builtin_neon_vsm4ekeyq_u32(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 50)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif __ai __attribute__((target("v8.1a,neon"))) int32_t vqrdmlahs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmlahs_s32(__p0, __p1, __p2); + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqrdmlahs_s32(__p0, __p1, __p2)); return __ret; } __ai __attribute__((target("v8.1a,neon"))) int16_t vqrdmlahh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmlahh_s16(__p0, __p1, __p2); + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqrdmlahh_s16(__p0, __p1, __p2)); return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahs_lane_s32(__p0_760, __p1_760, __p2_760, __p3_760) __extension__ ({ \ - int32_t __ret_760; \ - int32_t __s0_760 = __p0_760; \ - int32_t __s1_760 = __p1_760; \ - int32x2_t __s2_760 = __p2_760; \ - __ret_760 = vqrdmlahs_s32(__s0_760, __s1_760, vget_lane_s32(__s2_760, __p3_760)); \ - __ret_760; \ -}) -#else -#define vqrdmlahs_lane_s32(__p0_761, __p1_761, __p2_761, __p3_761) __extension__ ({ \ - int32_t __ret_761; \ - int32_t __s0_761 = __p0_761; \ - int32_t __s1_761 = __p1_761; \ - int32x2_t __s2_761 = __p2_761; \ - int32x2_t __rev2_761; __rev2_761 = __builtin_shufflevector(__s2_761, __s2_761, 1, 0); \ - __ret_761 = vqrdmlahs_s32(__s0_761, __s1_761, __noswap_vget_lane_s32(__rev2_761, __p3_761)); \ - __ret_761; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahh_lane_s16(__p0_762, __p1_762, __p2_762, __p3_762) __extension__ ({ \ - int16_t __ret_762; \ - int16_t __s0_762 = __p0_762; \ - int16_t __s1_762 = __p1_762; \ - int16x4_t __s2_762 = __p2_762; \ - __ret_762 = vqrdmlahh_s16(__s0_762, __s1_762, vget_lane_s16(__s2_762, __p3_762)); \ - __ret_762; \ -}) -#else -#define vqrdmlahh_lane_s16(__p0_763, __p1_763, __p2_763, __p3_763) __extension__ ({ \ - int16_t __ret_763; \ - int16_t __s0_763 = __p0_763; \ - int16_t __s1_763 = __p1_763; \ - int16x4_t __s2_763 = __p2_763; \ - int16x4_t __rev2_763; __rev2_763 = __builtin_shufflevector(__s2_763, __s2_763, 3, 2, 1, 0); \ - __ret_763 = vqrdmlahh_s16(__s0_763, __s1_763, __noswap_vget_lane_s16(__rev2_763, __p3_763)); \ - __ret_763; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahs_laneq_s32(__p0_764, __p1_764, __p2_764, __p3_764) __extension__ ({ \ - int32_t __ret_764; \ - int32_t __s0_764 = __p0_764; \ - int32_t __s1_764 = __p1_764; \ - int32x4_t __s2_764 = __p2_764; \ - __ret_764 = vqrdmlahs_s32(__s0_764, __s1_764, vgetq_lane_s32(__s2_764, __p3_764)); \ - __ret_764; \ -}) -#else -#define vqrdmlahs_laneq_s32(__p0_765, __p1_765, __p2_765, __p3_765) __extension__ ({ \ - int32_t __ret_765; \ - int32_t __s0_765 = __p0_765; \ - int32_t __s1_765 = __p1_765; \ - int32x4_t __s2_765 = __p2_765; \ - int32x4_t __rev2_765; __rev2_765 = __builtin_shufflevector(__s2_765, __s2_765, 3, 2, 1, 0); \ - __ret_765 = vqrdmlahs_s32(__s0_765, __s1_765, __noswap_vgetq_lane_s32(__rev2_765, __p3_765)); \ - __ret_765; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahh_laneq_s16(__p0_766, __p1_766, __p2_766, __p3_766) __extension__ ({ \ - int16_t __ret_766; \ - int16_t __s0_766 = __p0_766; \ - int16_t __s1_766 = __p1_766; \ - int16x8_t __s2_766 = __p2_766; \ - __ret_766 = vqrdmlahh_s16(__s0_766, __s1_766, vgetq_lane_s16(__s2_766, __p3_766)); \ - __ret_766; \ -}) -#else -#define vqrdmlahh_laneq_s16(__p0_767, __p1_767, __p2_767, __p3_767) __extension__ ({ \ - int16_t __ret_767; \ - int16_t __s0_767 = __p0_767; \ - int16_t __s1_767 = __p1_767; \ - int16x8_t __s2_767 = __p2_767; \ - int16x8_t __rev2_767; __rev2_767 = __builtin_shufflevector(__s2_767, __s2_767, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_767 = vqrdmlahh_s16(__s0_767, __s1_767, __noswap_vgetq_lane_s16(__rev2_767, __p3_767)); \ - __ret_767; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_laneq_s32(__p0_768, __p1_768, __p2_768, __p3_768) __extension__ ({ \ - int32x4_t __ret_768; \ - int32x4_t __s0_768 = __p0_768; \ - int32x4_t __s1_768 = __p1_768; \ - int32x4_t __s2_768 = __p2_768; \ - __ret_768 = vqrdmlahq_s32(__s0_768, __s1_768, splatq_laneq_s32(__s2_768, __p3_768)); \ - __ret_768; \ -}) -#else -#define vqrdmlahq_laneq_s32(__p0_769, __p1_769, __p2_769, __p3_769) __extension__ ({ \ - int32x4_t __ret_769; \ - int32x4_t __s0_769 = __p0_769; \ - int32x4_t __s1_769 = __p1_769; \ - int32x4_t __s2_769 = __p2_769; \ - int32x4_t __rev0_769; __rev0_769 = __builtin_shufflevector(__s0_769, __s0_769, 3, 2, 1, 0); \ - int32x4_t __rev1_769; __rev1_769 = __builtin_shufflevector(__s1_769, __s1_769, 3, 2, 1, 0); \ - int32x4_t __rev2_769; __rev2_769 = __builtin_shufflevector(__s2_769, __s2_769, 3, 2, 1, 0); \ - __ret_769 = __noswap_vqrdmlahq_s32(__rev0_769, __rev1_769, __noswap_splatq_laneq_s32(__rev2_769, __p3_769)); \ - __ret_769 = __builtin_shufflevector(__ret_769, __ret_769, 3, 2, 1, 0); \ - __ret_769; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_laneq_s16(__p0_770, __p1_770, __p2_770, __p3_770) __extension__ ({ \ - int16x8_t __ret_770; \ - int16x8_t __s0_770 = __p0_770; \ - int16x8_t __s1_770 = __p1_770; \ - int16x8_t __s2_770 = __p2_770; \ - __ret_770 = vqrdmlahq_s16(__s0_770, __s1_770, splatq_laneq_s16(__s2_770, __p3_770)); \ - __ret_770; \ -}) -#else -#define vqrdmlahq_laneq_s16(__p0_771, __p1_771, __p2_771, __p3_771) __extension__ ({ \ - int16x8_t __ret_771; \ - int16x8_t __s0_771 = __p0_771; \ - int16x8_t __s1_771 = __p1_771; \ - int16x8_t __s2_771 = __p2_771; \ - int16x8_t __rev0_771; __rev0_771 = __builtin_shufflevector(__s0_771, __s0_771, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_771; __rev1_771 = __builtin_shufflevector(__s1_771, __s1_771, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_771; __rev2_771 = __builtin_shufflevector(__s2_771, __s2_771, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_771 = __noswap_vqrdmlahq_s16(__rev0_771, __rev1_771, __noswap_splatq_laneq_s16(__rev2_771, __p3_771)); \ - __ret_771 = __builtin_shufflevector(__ret_771, __ret_771, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_771; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_laneq_s32(__p0_772, __p1_772, __p2_772, __p3_772) __extension__ ({ \ - int32x2_t __ret_772; \ - int32x2_t __s0_772 = __p0_772; \ - int32x2_t __s1_772 = __p1_772; \ - int32x4_t __s2_772 = __p2_772; \ - __ret_772 = vqrdmlah_s32(__s0_772, __s1_772, splat_laneq_s32(__s2_772, __p3_772)); \ - __ret_772; \ -}) -#else -#define vqrdmlah_laneq_s32(__p0_773, __p1_773, __p2_773, __p3_773) __extension__ ({ \ - int32x2_t __ret_773; \ - int32x2_t __s0_773 = __p0_773; \ - int32x2_t __s1_773 = __p1_773; \ - int32x4_t __s2_773 = __p2_773; \ - int32x2_t __rev0_773; __rev0_773 = __builtin_shufflevector(__s0_773, __s0_773, 1, 0); \ - int32x2_t __rev1_773; __rev1_773 = __builtin_shufflevector(__s1_773, __s1_773, 1, 0); \ - int32x4_t __rev2_773; __rev2_773 = __builtin_shufflevector(__s2_773, __s2_773, 3, 2, 1, 0); \ - __ret_773 = __noswap_vqrdmlah_s32(__rev0_773, __rev1_773, __noswap_splat_laneq_s32(__rev2_773, __p3_773)); \ - __ret_773 = __builtin_shufflevector(__ret_773, __ret_773, 1, 0); \ - __ret_773; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_laneq_s16(__p0_774, __p1_774, __p2_774, __p3_774) __extension__ ({ \ - int16x4_t __ret_774; \ - int16x4_t __s0_774 = __p0_774; \ - int16x4_t __s1_774 = __p1_774; \ - int16x8_t __s2_774 = __p2_774; \ - __ret_774 = vqrdmlah_s16(__s0_774, __s1_774, splat_laneq_s16(__s2_774, __p3_774)); \ - __ret_774; \ -}) -#else -#define vqrdmlah_laneq_s16(__p0_775, __p1_775, __p2_775, __p3_775) __extension__ ({ \ - int16x4_t __ret_775; \ - int16x4_t __s0_775 = __p0_775; \ - int16x4_t __s1_775 = __p1_775; \ - int16x8_t __s2_775 = __p2_775; \ - int16x4_t __rev0_775; __rev0_775 = __builtin_shufflevector(__s0_775, __s0_775, 3, 2, 1, 0); \ - int16x4_t __rev1_775; __rev1_775 = __builtin_shufflevector(__s1_775, __s1_775, 3, 2, 1, 0); \ - int16x8_t __rev2_775; __rev2_775 = __builtin_shufflevector(__s2_775, __s2_775, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_775 = __noswap_vqrdmlah_s16(__rev0_775, __rev1_775, __noswap_splat_laneq_s16(__rev2_775, __p3_775)); \ - __ret_775 = __builtin_shufflevector(__ret_775, __ret_775, 3, 2, 1, 0); \ - __ret_775; \ -}) -#endif - -__ai __attribute__((target("v8.1a,neon"))) int32_t vqrdmlshs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqrdmlshs_s32(__p0, __p1, __p2); - return __ret; -} -__ai __attribute__((target("v8.1a,neon"))) int16_t vqrdmlshh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqrdmlshh_s16(__p0, __p1, __p2); - return __ret; -} -#ifdef __LITTLE_ENDIAN__ -#define vqrdmlshs_lane_s32(__p0_776, __p1_776, __p2_776, __p3_776) __extension__ ({ \ +#define vqrdmlahs_lane_s32(__p0_776, __p1_776, __p2_776, __p3_776) __extension__ ({ \ int32_t __ret_776; \ int32_t __s0_776 = __p0_776; \ int32_t __s1_776 = __p1_776; \ int32x2_t __s2_776 = __p2_776; \ - __ret_776 = vqrdmlshs_s32(__s0_776, __s1_776, vget_lane_s32(__s2_776, __p3_776)); \ + __ret_776 = vqrdmlahs_s32(__s0_776, __s1_776, vget_lane_s32(__s2_776, __p3_776)); \ __ret_776; \ }) #else -#define vqrdmlshs_lane_s32(__p0_777, __p1_777, __p2_777, __p3_777) __extension__ ({ \ +#define vqrdmlahs_lane_s32(__p0_777, __p1_777, __p2_777, __p3_777) __extension__ ({ \ int32_t __ret_777; \ int32_t __s0_777 = __p0_777; \ int32_t __s1_777 = __p1_777; \ int32x2_t __s2_777 = __p2_777; \ - int32x2_t __rev2_777; __rev2_777 = __builtin_shufflevector(__s2_777, __s2_777, 1, 0); \ - __ret_777 = vqrdmlshs_s32(__s0_777, __s1_777, __noswap_vget_lane_s32(__rev2_777, __p3_777)); \ + int32x2_t __rev2_777; __rev2_777 = __builtin_shufflevector(__s2_777, __s2_777, __lane_reverse_64_32); \ + __ret_777 = vqrdmlahs_s32(__s0_777, __s1_777, __noswap_vget_lane_s32(__rev2_777, __p3_777)); \ __ret_777; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshh_lane_s16(__p0_778, __p1_778, __p2_778, __p3_778) __extension__ ({ \ +#define vqrdmlahh_lane_s16(__p0_778, __p1_778, __p2_778, __p3_778) __extension__ ({ \ int16_t __ret_778; \ int16_t __s0_778 = __p0_778; \ int16_t __s1_778 = __p1_778; \ int16x4_t __s2_778 = __p2_778; \ - __ret_778 = vqrdmlshh_s16(__s0_778, __s1_778, vget_lane_s16(__s2_778, __p3_778)); \ + __ret_778 = vqrdmlahh_s16(__s0_778, __s1_778, vget_lane_s16(__s2_778, __p3_778)); \ __ret_778; \ }) #else -#define vqrdmlshh_lane_s16(__p0_779, __p1_779, __p2_779, __p3_779) __extension__ ({ \ +#define vqrdmlahh_lane_s16(__p0_779, __p1_779, __p2_779, __p3_779) __extension__ ({ \ int16_t __ret_779; \ int16_t __s0_779 = __p0_779; \ int16_t __s1_779 = __p1_779; \ int16x4_t __s2_779 = __p2_779; \ - int16x4_t __rev2_779; __rev2_779 = __builtin_shufflevector(__s2_779, __s2_779, 3, 2, 1, 0); \ - __ret_779 = vqrdmlshh_s16(__s0_779, __s1_779, __noswap_vget_lane_s16(__rev2_779, __p3_779)); \ + int16x4_t __rev2_779; __rev2_779 = __builtin_shufflevector(__s2_779, __s2_779, __lane_reverse_64_16); \ + __ret_779 = vqrdmlahh_s16(__s0_779, __s1_779, __noswap_vget_lane_s16(__rev2_779, __p3_779)); \ __ret_779; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshs_laneq_s32(__p0_780, __p1_780, __p2_780, __p3_780) __extension__ ({ \ +#define vqrdmlahs_laneq_s32(__p0_780, __p1_780, __p2_780, __p3_780) __extension__ ({ \ int32_t __ret_780; \ int32_t __s0_780 = __p0_780; \ int32_t __s1_780 = __p1_780; \ int32x4_t __s2_780 = __p2_780; \ - __ret_780 = vqrdmlshs_s32(__s0_780, __s1_780, vgetq_lane_s32(__s2_780, __p3_780)); \ + __ret_780 = vqrdmlahs_s32(__s0_780, __s1_780, vgetq_lane_s32(__s2_780, __p3_780)); \ __ret_780; \ }) #else -#define vqrdmlshs_laneq_s32(__p0_781, __p1_781, __p2_781, __p3_781) __extension__ ({ \ +#define vqrdmlahs_laneq_s32(__p0_781, __p1_781, __p2_781, __p3_781) __extension__ ({ \ int32_t __ret_781; \ int32_t __s0_781 = __p0_781; \ int32_t __s1_781 = __p1_781; \ int32x4_t __s2_781 = __p2_781; \ - int32x4_t __rev2_781; __rev2_781 = __builtin_shufflevector(__s2_781, __s2_781, 3, 2, 1, 0); \ - __ret_781 = vqrdmlshs_s32(__s0_781, __s1_781, __noswap_vgetq_lane_s32(__rev2_781, __p3_781)); \ + int32x4_t __rev2_781; __rev2_781 = __builtin_shufflevector(__s2_781, __s2_781, __lane_reverse_128_32); \ + __ret_781 = vqrdmlahs_s32(__s0_781, __s1_781, __noswap_vgetq_lane_s32(__rev2_781, __p3_781)); \ __ret_781; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshh_laneq_s16(__p0_782, __p1_782, __p2_782, __p3_782) __extension__ ({ \ +#define vqrdmlahh_laneq_s16(__p0_782, __p1_782, __p2_782, __p3_782) __extension__ ({ \ int16_t __ret_782; \ int16_t __s0_782 = __p0_782; \ int16_t __s1_782 = __p1_782; \ int16x8_t __s2_782 = __p2_782; \ - __ret_782 = vqrdmlshh_s16(__s0_782, __s1_782, vgetq_lane_s16(__s2_782, __p3_782)); \ + __ret_782 = vqrdmlahh_s16(__s0_782, __s1_782, vgetq_lane_s16(__s2_782, __p3_782)); \ __ret_782; \ }) #else -#define vqrdmlshh_laneq_s16(__p0_783, __p1_783, __p2_783, __p3_783) __extension__ ({ \ +#define vqrdmlahh_laneq_s16(__p0_783, __p1_783, __p2_783, __p3_783) __extension__ ({ \ int16_t __ret_783; \ int16_t __s0_783 = __p0_783; \ int16_t __s1_783 = __p1_783; \ int16x8_t __s2_783 = __p2_783; \ - int16x8_t __rev2_783; __rev2_783 = __builtin_shufflevector(__s2_783, __s2_783, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_783 = vqrdmlshh_s16(__s0_783, __s1_783, __noswap_vgetq_lane_s16(__rev2_783, __p3_783)); \ + int16x8_t __rev2_783; __rev2_783 = __builtin_shufflevector(__s2_783, __s2_783, __lane_reverse_128_16); \ + __ret_783 = vqrdmlahh_s16(__s0_783, __s1_783, __noswap_vgetq_lane_s16(__rev2_783, __p3_783)); \ __ret_783; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_laneq_s32(__p0_784, __p1_784, __p2_784, __p3_784) __extension__ ({ \ +#define vqrdmlahq_laneq_s32(__p0_784, __p1_784, __p2_784, __p3_784) __extension__ ({ \ int32x4_t __ret_784; \ int32x4_t __s0_784 = __p0_784; \ int32x4_t __s1_784 = __p1_784; \ int32x4_t __s2_784 = __p2_784; \ - __ret_784 = vqrdmlshq_s32(__s0_784, __s1_784, splatq_laneq_s32(__s2_784, __p3_784)); \ + __ret_784 = vqrdmlahq_s32(__s0_784, __s1_784, splatq_laneq_s32(__s2_784, __p3_784)); \ __ret_784; \ }) #else -#define vqrdmlshq_laneq_s32(__p0_785, __p1_785, __p2_785, __p3_785) __extension__ ({ \ +#define vqrdmlahq_laneq_s32(__p0_785, __p1_785, __p2_785, __p3_785) __extension__ ({ \ int32x4_t __ret_785; \ int32x4_t __s0_785 = __p0_785; \ int32x4_t __s1_785 = __p1_785; \ int32x4_t __s2_785 = __p2_785; \ - int32x4_t __rev0_785; __rev0_785 = __builtin_shufflevector(__s0_785, __s0_785, 3, 2, 1, 0); \ - int32x4_t __rev1_785; __rev1_785 = __builtin_shufflevector(__s1_785, __s1_785, 3, 2, 1, 0); \ - int32x4_t __rev2_785; __rev2_785 = __builtin_shufflevector(__s2_785, __s2_785, 3, 2, 1, 0); \ - __ret_785 = __noswap_vqrdmlshq_s32(__rev0_785, __rev1_785, __noswap_splatq_laneq_s32(__rev2_785, __p3_785)); \ - __ret_785 = __builtin_shufflevector(__ret_785, __ret_785, 3, 2, 1, 0); \ + int32x4_t __rev0_785; __rev0_785 = __builtin_shufflevector(__s0_785, __s0_785, __lane_reverse_128_32); \ + int32x4_t __rev1_785; __rev1_785 = __builtin_shufflevector(__s1_785, __s1_785, __lane_reverse_128_32); \ + int32x4_t __rev2_785; __rev2_785 = __builtin_shufflevector(__s2_785, __s2_785, __lane_reverse_128_32); \ + __ret_785 = __noswap_vqrdmlahq_s32(__rev0_785, __rev1_785, __noswap_splatq_laneq_s32(__rev2_785, __p3_785)); \ + __ret_785 = __builtin_shufflevector(__ret_785, __ret_785, __lane_reverse_128_32); \ __ret_785; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_laneq_s16(__p0_786, __p1_786, __p2_786, __p3_786) __extension__ ({ \ +#define vqrdmlahq_laneq_s16(__p0_786, __p1_786, __p2_786, __p3_786) __extension__ ({ \ int16x8_t __ret_786; \ int16x8_t __s0_786 = __p0_786; \ int16x8_t __s1_786 = __p1_786; \ int16x8_t __s2_786 = __p2_786; \ - __ret_786 = vqrdmlshq_s16(__s0_786, __s1_786, splatq_laneq_s16(__s2_786, __p3_786)); \ + __ret_786 = vqrdmlahq_s16(__s0_786, __s1_786, splatq_laneq_s16(__s2_786, __p3_786)); \ __ret_786; \ }) #else -#define vqrdmlshq_laneq_s16(__p0_787, __p1_787, __p2_787, __p3_787) __extension__ ({ \ +#define vqrdmlahq_laneq_s16(__p0_787, __p1_787, __p2_787, __p3_787) __extension__ ({ \ int16x8_t __ret_787; \ int16x8_t __s0_787 = __p0_787; \ int16x8_t __s1_787 = __p1_787; \ int16x8_t __s2_787 = __p2_787; \ - int16x8_t __rev0_787; __rev0_787 = __builtin_shufflevector(__s0_787, __s0_787, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_787; __rev1_787 = __builtin_shufflevector(__s1_787, __s1_787, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_787; __rev2_787 = __builtin_shufflevector(__s2_787, __s2_787, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_787 = __noswap_vqrdmlshq_s16(__rev0_787, __rev1_787, __noswap_splatq_laneq_s16(__rev2_787, __p3_787)); \ - __ret_787 = __builtin_shufflevector(__ret_787, __ret_787, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev0_787; __rev0_787 = __builtin_shufflevector(__s0_787, __s0_787, __lane_reverse_128_16); \ + int16x8_t __rev1_787; __rev1_787 = __builtin_shufflevector(__s1_787, __s1_787, __lane_reverse_128_16); \ + int16x8_t __rev2_787; __rev2_787 = __builtin_shufflevector(__s2_787, __s2_787, __lane_reverse_128_16); \ + __ret_787 = __noswap_vqrdmlahq_s16(__rev0_787, __rev1_787, __noswap_splatq_laneq_s16(__rev2_787, __p3_787)); \ + __ret_787 = __builtin_shufflevector(__ret_787, __ret_787, __lane_reverse_128_16); \ __ret_787; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_laneq_s32(__p0_788, __p1_788, __p2_788, __p3_788) __extension__ ({ \ +#define vqrdmlah_laneq_s32(__p0_788, __p1_788, __p2_788, __p3_788) __extension__ ({ \ int32x2_t __ret_788; \ int32x2_t __s0_788 = __p0_788; \ int32x2_t __s1_788 = __p1_788; \ int32x4_t __s2_788 = __p2_788; \ - __ret_788 = vqrdmlsh_s32(__s0_788, __s1_788, splat_laneq_s32(__s2_788, __p3_788)); \ + __ret_788 = vqrdmlah_s32(__s0_788, __s1_788, splat_laneq_s32(__s2_788, __p3_788)); \ __ret_788; \ }) #else -#define vqrdmlsh_laneq_s32(__p0_789, __p1_789, __p2_789, __p3_789) __extension__ ({ \ +#define vqrdmlah_laneq_s32(__p0_789, __p1_789, __p2_789, __p3_789) __extension__ ({ \ int32x2_t __ret_789; \ int32x2_t __s0_789 = __p0_789; \ int32x2_t __s1_789 = __p1_789; \ int32x4_t __s2_789 = __p2_789; \ - int32x2_t __rev0_789; __rev0_789 = __builtin_shufflevector(__s0_789, __s0_789, 1, 0); \ - int32x2_t __rev1_789; __rev1_789 = __builtin_shufflevector(__s1_789, __s1_789, 1, 0); \ - int32x4_t __rev2_789; __rev2_789 = __builtin_shufflevector(__s2_789, __s2_789, 3, 2, 1, 0); \ - __ret_789 = __noswap_vqrdmlsh_s32(__rev0_789, __rev1_789, __noswap_splat_laneq_s32(__rev2_789, __p3_789)); \ - __ret_789 = __builtin_shufflevector(__ret_789, __ret_789, 1, 0); \ + int32x2_t __rev0_789; __rev0_789 = __builtin_shufflevector(__s0_789, __s0_789, __lane_reverse_64_32); \ + int32x2_t __rev1_789; __rev1_789 = __builtin_shufflevector(__s1_789, __s1_789, __lane_reverse_64_32); \ + int32x4_t __rev2_789; __rev2_789 = __builtin_shufflevector(__s2_789, __s2_789, __lane_reverse_128_32); \ + __ret_789 = __noswap_vqrdmlah_s32(__rev0_789, __rev1_789, __noswap_splat_laneq_s32(__rev2_789, __p3_789)); \ + __ret_789 = __builtin_shufflevector(__ret_789, __ret_789, __lane_reverse_64_32); \ __ret_789; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_laneq_s16(__p0_790, __p1_790, __p2_790, __p3_790) __extension__ ({ \ +#define vqrdmlah_laneq_s16(__p0_790, __p1_790, __p2_790, __p3_790) __extension__ ({ \ int16x4_t __ret_790; \ int16x4_t __s0_790 = __p0_790; \ int16x4_t __s1_790 = __p1_790; \ int16x8_t __s2_790 = __p2_790; \ - __ret_790 = vqrdmlsh_s16(__s0_790, __s1_790, splat_laneq_s16(__s2_790, __p3_790)); \ + __ret_790 = vqrdmlah_s16(__s0_790, __s1_790, splat_laneq_s16(__s2_790, __p3_790)); \ __ret_790; \ }) #else -#define vqrdmlsh_laneq_s16(__p0_791, __p1_791, __p2_791, __p3_791) __extension__ ({ \ +#define vqrdmlah_laneq_s16(__p0_791, __p1_791, __p2_791, __p3_791) __extension__ ({ \ int16x4_t __ret_791; \ int16x4_t __s0_791 = __p0_791; \ int16x4_t __s1_791 = __p1_791; \ int16x8_t __s2_791 = __p2_791; \ - int16x4_t __rev0_791; __rev0_791 = __builtin_shufflevector(__s0_791, __s0_791, 3, 2, 1, 0); \ - int16x4_t __rev1_791; __rev1_791 = __builtin_shufflevector(__s1_791, __s1_791, 3, 2, 1, 0); \ - int16x8_t __rev2_791; __rev2_791 = __builtin_shufflevector(__s2_791, __s2_791, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_791 = __noswap_vqrdmlsh_s16(__rev0_791, __rev1_791, __noswap_splat_laneq_s16(__rev2_791, __p3_791)); \ - __ret_791 = __builtin_shufflevector(__ret_791, __ret_791, 3, 2, 1, 0); \ + int16x4_t __rev0_791; __rev0_791 = __builtin_shufflevector(__s0_791, __s0_791, __lane_reverse_64_16); \ + int16x4_t __rev1_791; __rev1_791 = __builtin_shufflevector(__s1_791, __s1_791, __lane_reverse_64_16); \ + int16x8_t __rev2_791; __rev2_791 = __builtin_shufflevector(__s2_791, __s2_791, __lane_reverse_128_16); \ + __ret_791 = __noswap_vqrdmlah_s16(__rev0_791, __rev1_791, __noswap_splat_laneq_s16(__rev2_791, __p3_791)); \ + __ret_791 = __builtin_shufflevector(__ret_791, __ret_791, __lane_reverse_64_16); \ __ret_791; \ }) #endif +__ai __attribute__((target("v8.1a,neon"))) int32_t vqrdmlshs_s32(int32_t __p0, int32_t __p1, int32_t __p2) { + int32_t __ret; + __ret = __builtin_bit_cast(int32_t, __builtin_neon_vqrdmlshs_s32(__p0, __p1, __p2)); + return __ret; +} +__ai __attribute__((target("v8.1a,neon"))) int16_t vqrdmlshh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { + int16_t __ret; + __ret = __builtin_bit_cast(int16_t, __builtin_neon_vqrdmlshh_s16(__p0, __p1, __p2)); + return __ret; +} +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshs_lane_s32(__p0_792, __p1_792, __p2_792, __p3_792) __extension__ ({ \ + int32_t __ret_792; \ + int32_t __s0_792 = __p0_792; \ + int32_t __s1_792 = __p1_792; \ + int32x2_t __s2_792 = __p2_792; \ + __ret_792 = vqrdmlshs_s32(__s0_792, __s1_792, vget_lane_s32(__s2_792, __p3_792)); \ + __ret_792; \ +}) +#else +#define vqrdmlshs_lane_s32(__p0_793, __p1_793, __p2_793, __p3_793) __extension__ ({ \ + int32_t __ret_793; \ + int32_t __s0_793 = __p0_793; \ + int32_t __s1_793 = __p1_793; \ + int32x2_t __s2_793 = __p2_793; \ + int32x2_t __rev2_793; __rev2_793 = __builtin_shufflevector(__s2_793, __s2_793, __lane_reverse_64_32); \ + __ret_793 = vqrdmlshs_s32(__s0_793, __s1_793, __noswap_vget_lane_s32(__rev2_793, __p3_793)); \ + __ret_793; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshh_lane_s16(__p0_794, __p1_794, __p2_794, __p3_794) __extension__ ({ \ + int16_t __ret_794; \ + int16_t __s0_794 = __p0_794; \ + int16_t __s1_794 = __p1_794; \ + int16x4_t __s2_794 = __p2_794; \ + __ret_794 = vqrdmlshh_s16(__s0_794, __s1_794, vget_lane_s16(__s2_794, __p3_794)); \ + __ret_794; \ +}) +#else +#define vqrdmlshh_lane_s16(__p0_795, __p1_795, __p2_795, __p3_795) __extension__ ({ \ + int16_t __ret_795; \ + int16_t __s0_795 = __p0_795; \ + int16_t __s1_795 = __p1_795; \ + int16x4_t __s2_795 = __p2_795; \ + int16x4_t __rev2_795; __rev2_795 = __builtin_shufflevector(__s2_795, __s2_795, __lane_reverse_64_16); \ + __ret_795 = vqrdmlshh_s16(__s0_795, __s1_795, __noswap_vget_lane_s16(__rev2_795, __p3_795)); \ + __ret_795; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshs_laneq_s32(__p0_796, __p1_796, __p2_796, __p3_796) __extension__ ({ \ + int32_t __ret_796; \ + int32_t __s0_796 = __p0_796; \ + int32_t __s1_796 = __p1_796; \ + int32x4_t __s2_796 = __p2_796; \ + __ret_796 = vqrdmlshs_s32(__s0_796, __s1_796, vgetq_lane_s32(__s2_796, __p3_796)); \ + __ret_796; \ +}) +#else +#define vqrdmlshs_laneq_s32(__p0_797, __p1_797, __p2_797, __p3_797) __extension__ ({ \ + int32_t __ret_797; \ + int32_t __s0_797 = __p0_797; \ + int32_t __s1_797 = __p1_797; \ + int32x4_t __s2_797 = __p2_797; \ + int32x4_t __rev2_797; __rev2_797 = __builtin_shufflevector(__s2_797, __s2_797, __lane_reverse_128_32); \ + __ret_797 = vqrdmlshs_s32(__s0_797, __s1_797, __noswap_vgetq_lane_s32(__rev2_797, __p3_797)); \ + __ret_797; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshh_laneq_s16(__p0_798, __p1_798, __p2_798, __p3_798) __extension__ ({ \ + int16_t __ret_798; \ + int16_t __s0_798 = __p0_798; \ + int16_t __s1_798 = __p1_798; \ + int16x8_t __s2_798 = __p2_798; \ + __ret_798 = vqrdmlshh_s16(__s0_798, __s1_798, vgetq_lane_s16(__s2_798, __p3_798)); \ + __ret_798; \ +}) +#else +#define vqrdmlshh_laneq_s16(__p0_799, __p1_799, __p2_799, __p3_799) __extension__ ({ \ + int16_t __ret_799; \ + int16_t __s0_799 = __p0_799; \ + int16_t __s1_799 = __p1_799; \ + int16x8_t __s2_799 = __p2_799; \ + int16x8_t __rev2_799; __rev2_799 = __builtin_shufflevector(__s2_799, __s2_799, __lane_reverse_128_16); \ + __ret_799 = vqrdmlshh_s16(__s0_799, __s1_799, __noswap_vgetq_lane_s16(__rev2_799, __p3_799)); \ + __ret_799; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshq_laneq_s32(__p0_800, __p1_800, __p2_800, __p3_800) __extension__ ({ \ + int32x4_t __ret_800; \ + int32x4_t __s0_800 = __p0_800; \ + int32x4_t __s1_800 = __p1_800; \ + int32x4_t __s2_800 = __p2_800; \ + __ret_800 = vqrdmlshq_s32(__s0_800, __s1_800, splatq_laneq_s32(__s2_800, __p3_800)); \ + __ret_800; \ +}) +#else +#define vqrdmlshq_laneq_s32(__p0_801, __p1_801, __p2_801, __p3_801) __extension__ ({ \ + int32x4_t __ret_801; \ + int32x4_t __s0_801 = __p0_801; \ + int32x4_t __s1_801 = __p1_801; \ + int32x4_t __s2_801 = __p2_801; \ + int32x4_t __rev0_801; __rev0_801 = __builtin_shufflevector(__s0_801, __s0_801, __lane_reverse_128_32); \ + int32x4_t __rev1_801; __rev1_801 = __builtin_shufflevector(__s1_801, __s1_801, __lane_reverse_128_32); \ + int32x4_t __rev2_801; __rev2_801 = __builtin_shufflevector(__s2_801, __s2_801, __lane_reverse_128_32); \ + __ret_801 = __noswap_vqrdmlshq_s32(__rev0_801, __rev1_801, __noswap_splatq_laneq_s32(__rev2_801, __p3_801)); \ + __ret_801 = __builtin_shufflevector(__ret_801, __ret_801, __lane_reverse_128_32); \ + __ret_801; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlshq_laneq_s16(__p0_802, __p1_802, __p2_802, __p3_802) __extension__ ({ \ + int16x8_t __ret_802; \ + int16x8_t __s0_802 = __p0_802; \ + int16x8_t __s1_802 = __p1_802; \ + int16x8_t __s2_802 = __p2_802; \ + __ret_802 = vqrdmlshq_s16(__s0_802, __s1_802, splatq_laneq_s16(__s2_802, __p3_802)); \ + __ret_802; \ +}) +#else +#define vqrdmlshq_laneq_s16(__p0_803, __p1_803, __p2_803, __p3_803) __extension__ ({ \ + int16x8_t __ret_803; \ + int16x8_t __s0_803 = __p0_803; \ + int16x8_t __s1_803 = __p1_803; \ + int16x8_t __s2_803 = __p2_803; \ + int16x8_t __rev0_803; __rev0_803 = __builtin_shufflevector(__s0_803, __s0_803, __lane_reverse_128_16); \ + int16x8_t __rev1_803; __rev1_803 = __builtin_shufflevector(__s1_803, __s1_803, __lane_reverse_128_16); \ + int16x8_t __rev2_803; __rev2_803 = __builtin_shufflevector(__s2_803, __s2_803, __lane_reverse_128_16); \ + __ret_803 = __noswap_vqrdmlshq_s16(__rev0_803, __rev1_803, __noswap_splatq_laneq_s16(__rev2_803, __p3_803)); \ + __ret_803 = __builtin_shufflevector(__ret_803, __ret_803, __lane_reverse_128_16); \ + __ret_803; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlsh_laneq_s32(__p0_804, __p1_804, __p2_804, __p3_804) __extension__ ({ \ + int32x2_t __ret_804; \ + int32x2_t __s0_804 = __p0_804; \ + int32x2_t __s1_804 = __p1_804; \ + int32x4_t __s2_804 = __p2_804; \ + __ret_804 = vqrdmlsh_s32(__s0_804, __s1_804, splat_laneq_s32(__s2_804, __p3_804)); \ + __ret_804; \ +}) +#else +#define vqrdmlsh_laneq_s32(__p0_805, __p1_805, __p2_805, __p3_805) __extension__ ({ \ + int32x2_t __ret_805; \ + int32x2_t __s0_805 = __p0_805; \ + int32x2_t __s1_805 = __p1_805; \ + int32x4_t __s2_805 = __p2_805; \ + int32x2_t __rev0_805; __rev0_805 = __builtin_shufflevector(__s0_805, __s0_805, __lane_reverse_64_32); \ + int32x2_t __rev1_805; __rev1_805 = __builtin_shufflevector(__s1_805, __s1_805, __lane_reverse_64_32); \ + int32x4_t __rev2_805; __rev2_805 = __builtin_shufflevector(__s2_805, __s2_805, __lane_reverse_128_32); \ + __ret_805 = __noswap_vqrdmlsh_s32(__rev0_805, __rev1_805, __noswap_splat_laneq_s32(__rev2_805, __p3_805)); \ + __ret_805 = __builtin_shufflevector(__ret_805, __ret_805, __lane_reverse_64_32); \ + __ret_805; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vqrdmlsh_laneq_s16(__p0_806, __p1_806, __p2_806, __p3_806) __extension__ ({ \ + int16x4_t __ret_806; \ + int16x4_t __s0_806 = __p0_806; \ + int16x4_t __s1_806 = __p1_806; \ + int16x8_t __s2_806 = __p2_806; \ + __ret_806 = vqrdmlsh_s16(__s0_806, __s1_806, splat_laneq_s16(__s2_806, __p3_806)); \ + __ret_806; \ +}) +#else +#define vqrdmlsh_laneq_s16(__p0_807, __p1_807, __p2_807, __p3_807) __extension__ ({ \ + int16x4_t __ret_807; \ + int16x4_t __s0_807 = __p0_807; \ + int16x4_t __s1_807 = __p1_807; \ + int16x8_t __s2_807 = __p2_807; \ + int16x4_t __rev0_807; __rev0_807 = __builtin_shufflevector(__s0_807, __s0_807, __lane_reverse_64_16); \ + int16x4_t __rev1_807; __rev1_807 = __builtin_shufflevector(__s1_807, __s1_807, __lane_reverse_64_16); \ + int16x8_t __rev2_807; __rev2_807 = __builtin_shufflevector(__s2_807, __s2_807, __lane_reverse_128_16); \ + __ret_807 = __noswap_vqrdmlsh_s16(__rev0_807, __rev1_807, __noswap_splat_laneq_s16(__rev2_807, __p3_807)); \ + __ret_807 = __builtin_shufflevector(__ret_807, __ret_807, __lane_reverse_64_16); \ + __ret_807; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcaddq_rot270_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcaddq_rot270_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcaddq_rot270_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcaddq_rot270_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcaddq_rot270_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcaddq_rot270_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67947,16 +70465,16 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcaddq_rot270_f64(float64 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcaddq_rot90_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcaddq_rot90_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), 42)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcaddq_rot90_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcaddq_rot90_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67964,17 +70482,17 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcaddq_rot90_f64(float64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcmlaq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -67982,17 +70500,17 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_f64(float64x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_rot180_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_rot180_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -68000,17 +70518,17 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot180_f64(float64 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_rot270_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_rot270_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -68018,17 +70536,17 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot270_f64(float64 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_rot90_f64(__builtin_bit_cast(int8x16_t, __p0), __builtin_bit_cast(int8x16_t, __p1), __builtin_bit_cast(int8x16_t, __p2), 42)); return __ret; } #else __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_f64((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vcmlaq_rot90_f64(__builtin_bit_cast(int8x16_t, __rev0), __builtin_bit_cast(int8x16_t, __rev1), __builtin_bit_cast(int8x16_t, __rev2), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -68036,15 +70554,15 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot90_f64(float64x #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd32xq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrnd32xq_f32((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd32xq_f32(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd32xq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrnd32xq_f32((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd32xq_f32(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68052,15 +70570,15 @@ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd32xq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd32x_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd32x_f32((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd32x_f32(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd32x_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrnd32x_f32((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd32x_f32(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -68068,36 +70586,36 @@ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd32x_f32(float32x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd32xq_f64(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd32xq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrnd32xq_f64((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd32xq_f64(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("v8.5a,neon"))) float64x1_t vrnd32x_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd32x_f64((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrnd32x_f64(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrnd32zq_f32((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd32zq_f32(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd32zq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrnd32zq_f32((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd32zq_f32(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68105,15 +70623,15 @@ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd32zq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd32z_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd32z_f32((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd32z_f32(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd32z_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrnd32z_f32((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd32z_f32(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -68121,36 +70639,36 @@ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd32z_f32(float32x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd32zq_f64(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd32zq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrnd32zq_f64((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd32zq_f64(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("v8.5a,neon"))) float64x1_t vrnd32z_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd32z_f64((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrnd32z_f64(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrnd64xq_f32((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd64xq_f32(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd64xq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrnd64xq_f32((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd64xq_f32(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68158,15 +70676,15 @@ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd64xq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd64x_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd64x_f32((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd64x_f32(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd64x_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrnd64x_f32((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd64x_f32(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -68174,36 +70692,36 @@ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd64x_f32(float32x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd64xq_f64(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd64xq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrnd64xq_f64((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd64xq_f64(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("v8.5a,neon"))) float64x1_t vrnd64x_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd64x_f64((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrnd64x_f64(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) { float32x4_t __ret; - __ret = (float32x4_t) __builtin_neon_vrnd64zq_f32((int8x16_t)__p0, 41); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd64zq_f32(__builtin_bit_cast(int8x16_t, __p0), 41)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd64zq_f32(float32x4_t __p0) { float32x4_t __ret; - float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (float32x4_t) __builtin_neon_vrnd64zq_f32((int8x16_t)__rev0, 41); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + __ret = __builtin_bit_cast(float32x4_t, __builtin_neon_vrnd64zq_f32(__builtin_bit_cast(int8x16_t, __rev0), 41)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68211,15 +70729,15 @@ __ai __attribute__((target("v8.5a,neon"))) float32x4_t vrnd64zq_f32(float32x4_t #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd64z_f32(float32x2_t __p0) { float32x2_t __ret; - __ret = (float32x2_t) __builtin_neon_vrnd64z_f32((int8x8_t)__p0, 9); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd64z_f32(__builtin_bit_cast(int8x8_t, __p0), 9)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd64z_f32(float32x2_t __p0) { float32x2_t __ret; - float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float32x2_t) __builtin_neon_vrnd64z_f32((int8x8_t)__rev0, 9); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + __ret = __builtin_bit_cast(float32x2_t, __builtin_neon_vrnd64z_f32(__builtin_bit_cast(int8x8_t, __rev0), 9)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -68227,235 +70745,235 @@ __ai __attribute__((target("v8.5a,neon"))) float32x2_t vrnd64z_f32(float32x2_t _ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) { float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__p0, 42); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd64zq_f64(__builtin_bit_cast(int8x16_t, __p0), 42)); return __ret; } #else __ai __attribute__((target("v8.5a,neon"))) float64x2_t vrnd64zq_f64(float64x2_t __p0) { float64x2_t __ret; - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - __ret = (float64x2_t) __builtin_neon_vrnd64zq_f64((int8x16_t)__rev0, 42); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + __ret = __builtin_bit_cast(float64x2_t, __builtin_neon_vrnd64zq_f64(__builtin_bit_cast(int8x16_t, __rev0), 42)); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif __ai __attribute__((target("v8.5a,neon"))) float64x1_t vrnd64z_f64(float64x1_t __p0) { float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vrnd64z_f64((int8x8_t)__p0, 10); + __ret = __builtin_bit_cast(float64x1_t, __builtin_neon_vrnd64z_f64(__builtin_bit_cast(int8x8_t, __p0), 10)); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdotq_lane_f32(__p0_792, __p1_792, __p2_792, __p3_792) __extension__ ({ \ - float32x4_t __ret_792; \ - float32x4_t __s0_792 = __p0_792; \ - bfloat16x8_t __s1_792 = __p1_792; \ - bfloat16x4_t __s2_792 = __p2_792; \ - __ret_792 = vbfdotq_f32(__s0_792, __s1_792, __builtin_bit_cast(bfloat16x8_t, splatq_lane_f32(__builtin_bit_cast(float32x2_t, __s2_792), __p3_792))); \ - __ret_792; \ +#define vbfdotq_lane_f32(__p0_808, __p1_808, __p2_808, __p3_808) __extension__ ({ \ + float32x4_t __ret_808; \ + float32x4_t __s0_808 = __p0_808; \ + bfloat16x8_t __s1_808 = __p1_808; \ + bfloat16x4_t __s2_808 = __p2_808; \ + __ret_808 = vbfdotq_f32(__s0_808, __s1_808, __builtin_bit_cast(bfloat16x8_t, splatq_lane_f32(__builtin_bit_cast(float32x2_t, __s2_808), __p3_808))); \ + __ret_808; \ }) #else -#define vbfdotq_lane_f32(__p0_793, __p1_793, __p2_793, __p3_793) __extension__ ({ \ - float32x4_t __ret_793; \ - float32x4_t __s0_793 = __p0_793; \ - bfloat16x8_t __s1_793 = __p1_793; \ - bfloat16x4_t __s2_793 = __p2_793; \ - float32x4_t __rev0_793; __rev0_793 = __builtin_shufflevector(__s0_793, __s0_793, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_793; __rev1_793 = __builtin_shufflevector(__s1_793, __s1_793, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_793; __rev2_793 = __builtin_shufflevector(__s2_793, __s2_793, 3, 2, 1, 0); \ - __ret_793 = __noswap_vbfdotq_f32(__rev0_793, __rev1_793, __builtin_bit_cast(bfloat16x8_t, __noswap_splatq_lane_f32(__builtin_bit_cast(float32x2_t, __rev2_793), __p3_793))); \ - __ret_793 = __builtin_shufflevector(__ret_793, __ret_793, 3, 2, 1, 0); \ - __ret_793; \ +#define vbfdotq_lane_f32(__p0_809, __p1_809, __p2_809, __p3_809) __extension__ ({ \ + float32x4_t __ret_809; \ + float32x4_t __s0_809 = __p0_809; \ + bfloat16x8_t __s1_809 = __p1_809; \ + bfloat16x4_t __s2_809 = __p2_809; \ + float32x4_t __rev0_809; __rev0_809 = __builtin_shufflevector(__s0_809, __s0_809, __lane_reverse_128_32); \ + bfloat16x8_t __rev1_809; __rev1_809 = __builtin_shufflevector(__s1_809, __s1_809, __lane_reverse_128_16); \ + bfloat16x4_t __rev2_809; __rev2_809 = __builtin_shufflevector(__s2_809, __s2_809, __lane_reverse_64_16); \ + __ret_809 = __noswap_vbfdotq_f32(__rev0_809, __rev1_809, __builtin_bit_cast(bfloat16x8_t, __noswap_splatq_lane_f32(__builtin_bit_cast(float32x2_t, __rev2_809), __p3_809))); \ + __ret_809 = __builtin_shufflevector(__ret_809, __ret_809, __lane_reverse_128_32); \ + __ret_809; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdot_lane_f32(__p0_794, __p1_794, __p2_794, __p3_794) __extension__ ({ \ - float32x2_t __ret_794; \ - float32x2_t __s0_794 = __p0_794; \ - bfloat16x4_t __s1_794 = __p1_794; \ - bfloat16x4_t __s2_794 = __p2_794; \ - __ret_794 = vbfdot_f32(__s0_794, __s1_794, __builtin_bit_cast(bfloat16x4_t, splat_lane_f32(__builtin_bit_cast(float32x2_t, __s2_794), __p3_794))); \ - __ret_794; \ +#define vbfdot_lane_f32(__p0_810, __p1_810, __p2_810, __p3_810) __extension__ ({ \ + float32x2_t __ret_810; \ + float32x2_t __s0_810 = __p0_810; \ + bfloat16x4_t __s1_810 = __p1_810; \ + bfloat16x4_t __s2_810 = __p2_810; \ + __ret_810 = vbfdot_f32(__s0_810, __s1_810, __builtin_bit_cast(bfloat16x4_t, splat_lane_f32(__builtin_bit_cast(float32x2_t, __s2_810), __p3_810))); \ + __ret_810; \ }) #else -#define vbfdot_lane_f32(__p0_795, __p1_795, __p2_795, __p3_795) __extension__ ({ \ - float32x2_t __ret_795; \ - float32x2_t __s0_795 = __p0_795; \ - bfloat16x4_t __s1_795 = __p1_795; \ - bfloat16x4_t __s2_795 = __p2_795; \ - float32x2_t __rev0_795; __rev0_795 = __builtin_shufflevector(__s0_795, __s0_795, 1, 0); \ - bfloat16x4_t __rev1_795; __rev1_795 = __builtin_shufflevector(__s1_795, __s1_795, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_795; __rev2_795 = __builtin_shufflevector(__s2_795, __s2_795, 3, 2, 1, 0); \ - __ret_795 = __noswap_vbfdot_f32(__rev0_795, __rev1_795, __builtin_bit_cast(bfloat16x4_t, __noswap_splat_lane_f32(__builtin_bit_cast(float32x2_t, __rev2_795), __p3_795))); \ - __ret_795 = __builtin_shufflevector(__ret_795, __ret_795, 1, 0); \ - __ret_795; \ +#define vbfdot_lane_f32(__p0_811, __p1_811, __p2_811, __p3_811) __extension__ ({ \ + float32x2_t __ret_811; \ + float32x2_t __s0_811 = __p0_811; \ + bfloat16x4_t __s1_811 = __p1_811; \ + bfloat16x4_t __s2_811 = __p2_811; \ + float32x2_t __rev0_811; __rev0_811 = __builtin_shufflevector(__s0_811, __s0_811, __lane_reverse_64_32); \ + bfloat16x4_t __rev1_811; __rev1_811 = __builtin_shufflevector(__s1_811, __s1_811, __lane_reverse_64_16); \ + bfloat16x4_t __rev2_811; __rev2_811 = __builtin_shufflevector(__s2_811, __s2_811, __lane_reverse_64_16); \ + __ret_811 = __noswap_vbfdot_f32(__rev0_811, __rev1_811, __builtin_bit_cast(bfloat16x4_t, __noswap_splat_lane_f32(__builtin_bit_cast(float32x2_t, __rev2_811), __p3_811))); \ + __ret_811 = __builtin_shufflevector(__ret_811, __ret_811, __lane_reverse_64_32); \ + __ret_811; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdotq_laneq_f32(__p0_796, __p1_796, __p2_796, __p3_796) __extension__ ({ \ - float32x4_t __ret_796; \ - float32x4_t __s0_796 = __p0_796; \ - bfloat16x8_t __s1_796 = __p1_796; \ - bfloat16x8_t __s2_796 = __p2_796; \ - __ret_796 = vbfdotq_f32(__s0_796, __s1_796, __builtin_bit_cast(bfloat16x8_t, splatq_laneq_f32(__builtin_bit_cast(float32x4_t, __s2_796), __p3_796))); \ - __ret_796; \ +#define vbfdotq_laneq_f32(__p0_812, __p1_812, __p2_812, __p3_812) __extension__ ({ \ + float32x4_t __ret_812; \ + float32x4_t __s0_812 = __p0_812; \ + bfloat16x8_t __s1_812 = __p1_812; \ + bfloat16x8_t __s2_812 = __p2_812; \ + __ret_812 = vbfdotq_f32(__s0_812, __s1_812, __builtin_bit_cast(bfloat16x8_t, splatq_laneq_f32(__builtin_bit_cast(float32x4_t, __s2_812), __p3_812))); \ + __ret_812; \ }) #else -#define vbfdotq_laneq_f32(__p0_797, __p1_797, __p2_797, __p3_797) __extension__ ({ \ - float32x4_t __ret_797; \ - float32x4_t __s0_797 = __p0_797; \ - bfloat16x8_t __s1_797 = __p1_797; \ - bfloat16x8_t __s2_797 = __p2_797; \ - float32x4_t __rev0_797; __rev0_797 = __builtin_shufflevector(__s0_797, __s0_797, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_797; __rev1_797 = __builtin_shufflevector(__s1_797, __s1_797, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_797; __rev2_797 = __builtin_shufflevector(__s2_797, __s2_797, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_797 = __noswap_vbfdotq_f32(__rev0_797, __rev1_797, __builtin_bit_cast(bfloat16x8_t, __noswap_splatq_laneq_f32(__builtin_bit_cast(float32x4_t, __rev2_797), __p3_797))); \ - __ret_797 = __builtin_shufflevector(__ret_797, __ret_797, 3, 2, 1, 0); \ - __ret_797; \ +#define vbfdotq_laneq_f32(__p0_813, __p1_813, __p2_813, __p3_813) __extension__ ({ \ + float32x4_t __ret_813; \ + float32x4_t __s0_813 = __p0_813; \ + bfloat16x8_t __s1_813 = __p1_813; \ + bfloat16x8_t __s2_813 = __p2_813; \ + float32x4_t __rev0_813; __rev0_813 = __builtin_shufflevector(__s0_813, __s0_813, __lane_reverse_128_32); \ + bfloat16x8_t __rev1_813; __rev1_813 = __builtin_shufflevector(__s1_813, __s1_813, __lane_reverse_128_16); \ + bfloat16x8_t __rev2_813; __rev2_813 = __builtin_shufflevector(__s2_813, __s2_813, __lane_reverse_128_16); \ + __ret_813 = __noswap_vbfdotq_f32(__rev0_813, __rev1_813, __builtin_bit_cast(bfloat16x8_t, __noswap_splatq_laneq_f32(__builtin_bit_cast(float32x4_t, __rev2_813), __p3_813))); \ + __ret_813 = __builtin_shufflevector(__ret_813, __ret_813, __lane_reverse_128_32); \ + __ret_813; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdot_laneq_f32(__p0_798, __p1_798, __p2_798, __p3_798) __extension__ ({ \ - float32x2_t __ret_798; \ - float32x2_t __s0_798 = __p0_798; \ - bfloat16x4_t __s1_798 = __p1_798; \ - bfloat16x8_t __s2_798 = __p2_798; \ - __ret_798 = vbfdot_f32(__s0_798, __s1_798, __builtin_bit_cast(bfloat16x4_t, splat_laneq_f32(__builtin_bit_cast(float32x4_t, __s2_798), __p3_798))); \ - __ret_798; \ +#define vbfdot_laneq_f32(__p0_814, __p1_814, __p2_814, __p3_814) __extension__ ({ \ + float32x2_t __ret_814; \ + float32x2_t __s0_814 = __p0_814; \ + bfloat16x4_t __s1_814 = __p1_814; \ + bfloat16x8_t __s2_814 = __p2_814; \ + __ret_814 = vbfdot_f32(__s0_814, __s1_814, __builtin_bit_cast(bfloat16x4_t, splat_laneq_f32(__builtin_bit_cast(float32x4_t, __s2_814), __p3_814))); \ + __ret_814; \ }) #else -#define vbfdot_laneq_f32(__p0_799, __p1_799, __p2_799, __p3_799) __extension__ ({ \ - float32x2_t __ret_799; \ - float32x2_t __s0_799 = __p0_799; \ - bfloat16x4_t __s1_799 = __p1_799; \ - bfloat16x8_t __s2_799 = __p2_799; \ - float32x2_t __rev0_799; __rev0_799 = __builtin_shufflevector(__s0_799, __s0_799, 1, 0); \ - bfloat16x4_t __rev1_799; __rev1_799 = __builtin_shufflevector(__s1_799, __s1_799, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_799; __rev2_799 = __builtin_shufflevector(__s2_799, __s2_799, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_799 = __noswap_vbfdot_f32(__rev0_799, __rev1_799, __builtin_bit_cast(bfloat16x4_t, __noswap_splat_laneq_f32(__builtin_bit_cast(float32x4_t, __rev2_799), __p3_799))); \ - __ret_799 = __builtin_shufflevector(__ret_799, __ret_799, 1, 0); \ - __ret_799; \ +#define vbfdot_laneq_f32(__p0_815, __p1_815, __p2_815, __p3_815) __extension__ ({ \ + float32x2_t __ret_815; \ + float32x2_t __s0_815 = __p0_815; \ + bfloat16x4_t __s1_815 = __p1_815; \ + bfloat16x8_t __s2_815 = __p2_815; \ + float32x2_t __rev0_815; __rev0_815 = __builtin_shufflevector(__s0_815, __s0_815, __lane_reverse_64_32); \ + bfloat16x4_t __rev1_815; __rev1_815 = __builtin_shufflevector(__s1_815, __s1_815, __lane_reverse_64_16); \ + bfloat16x8_t __rev2_815; __rev2_815 = __builtin_shufflevector(__s2_815, __s2_815, __lane_reverse_128_16); \ + __ret_815 = __noswap_vbfdot_f32(__rev0_815, __rev1_815, __builtin_bit_cast(bfloat16x4_t, __noswap_splat_laneq_f32(__builtin_bit_cast(float32x4_t, __rev2_815), __p3_815))); \ + __ret_815 = __builtin_shufflevector(__ret_815, __ret_815, __lane_reverse_64_32); \ + __ret_815; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlalbq_lane_f32(__p0_800, __p1_800, __p2_800, __p3_800) __extension__ ({ \ - float32x4_t __ret_800; \ - float32x4_t __s0_800 = __p0_800; \ - bfloat16x8_t __s1_800 = __p1_800; \ - bfloat16x4_t __s2_800 = __p2_800; \ - __ret_800 = vbfmlalbq_f32(__s0_800, __s1_800, (bfloat16x8_t) {vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800)}); \ - __ret_800; \ +#define vbfmlalbq_lane_f32(__p0_816, __p1_816, __p2_816, __p3_816) __extension__ ({ \ + float32x4_t __ret_816; \ + float32x4_t __s0_816 = __p0_816; \ + bfloat16x8_t __s1_816 = __p1_816; \ + bfloat16x4_t __s2_816 = __p2_816; \ + __ret_816 = vbfmlalbq_f32(__s0_816, __s1_816, (bfloat16x8_t) {vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816), vget_lane_bf16(__s2_816, __p3_816)}); \ + __ret_816; \ }) #else -#define vbfmlalbq_lane_f32(__p0_801, __p1_801, __p2_801, __p3_801) __extension__ ({ \ - float32x4_t __ret_801; \ - float32x4_t __s0_801 = __p0_801; \ - bfloat16x8_t __s1_801 = __p1_801; \ - bfloat16x4_t __s2_801 = __p2_801; \ - float32x4_t __rev0_801; __rev0_801 = __builtin_shufflevector(__s0_801, __s0_801, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_801; __rev1_801 = __builtin_shufflevector(__s1_801, __s1_801, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_801; __rev2_801 = __builtin_shufflevector(__s2_801, __s2_801, 3, 2, 1, 0); \ - __ret_801 = __noswap_vbfmlalbq_f32(__rev0_801, __rev1_801, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801)}); \ - __ret_801 = __builtin_shufflevector(__ret_801, __ret_801, 3, 2, 1, 0); \ - __ret_801; \ +#define vbfmlalbq_lane_f32(__p0_817, __p1_817, __p2_817, __p3_817) __extension__ ({ \ + float32x4_t __ret_817; \ + float32x4_t __s0_817 = __p0_817; \ + bfloat16x8_t __s1_817 = __p1_817; \ + bfloat16x4_t __s2_817 = __p2_817; \ + float32x4_t __rev0_817; __rev0_817 = __builtin_shufflevector(__s0_817, __s0_817, __lane_reverse_128_32); \ + bfloat16x8_t __rev1_817; __rev1_817 = __builtin_shufflevector(__s1_817, __s1_817, __lane_reverse_128_16); \ + bfloat16x4_t __rev2_817; __rev2_817 = __builtin_shufflevector(__s2_817, __s2_817, __lane_reverse_64_16); \ + __ret_817 = __noswap_vbfmlalbq_f32(__rev0_817, __rev1_817, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817), __noswap_vget_lane_bf16(__rev2_817, __p3_817)}); \ + __ret_817 = __builtin_shufflevector(__ret_817, __ret_817, __lane_reverse_128_32); \ + __ret_817; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlalbq_laneq_f32(__p0_802, __p1_802, __p2_802, __p3_802) __extension__ ({ \ - float32x4_t __ret_802; \ - float32x4_t __s0_802 = __p0_802; \ - bfloat16x8_t __s1_802 = __p1_802; \ - bfloat16x8_t __s2_802 = __p2_802; \ - __ret_802 = vbfmlalbq_f32(__s0_802, __s1_802, (bfloat16x8_t) {vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802)}); \ - __ret_802; \ +#define vbfmlalbq_laneq_f32(__p0_818, __p1_818, __p2_818, __p3_818) __extension__ ({ \ + float32x4_t __ret_818; \ + float32x4_t __s0_818 = __p0_818; \ + bfloat16x8_t __s1_818 = __p1_818; \ + bfloat16x8_t __s2_818 = __p2_818; \ + __ret_818 = vbfmlalbq_f32(__s0_818, __s1_818, (bfloat16x8_t) {vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818), vgetq_lane_bf16(__s2_818, __p3_818)}); \ + __ret_818; \ }) #else -#define vbfmlalbq_laneq_f32(__p0_803, __p1_803, __p2_803, __p3_803) __extension__ ({ \ - float32x4_t __ret_803; \ - float32x4_t __s0_803 = __p0_803; \ - bfloat16x8_t __s1_803 = __p1_803; \ - bfloat16x8_t __s2_803 = __p2_803; \ - float32x4_t __rev0_803; __rev0_803 = __builtin_shufflevector(__s0_803, __s0_803, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_803; __rev1_803 = __builtin_shufflevector(__s1_803, __s1_803, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_803; __rev2_803 = __builtin_shufflevector(__s2_803, __s2_803, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_803 = __noswap_vbfmlalbq_f32(__rev0_803, __rev1_803, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803)}); \ - __ret_803 = __builtin_shufflevector(__ret_803, __ret_803, 3, 2, 1, 0); \ - __ret_803; \ +#define vbfmlalbq_laneq_f32(__p0_819, __p1_819, __p2_819, __p3_819) __extension__ ({ \ + float32x4_t __ret_819; \ + float32x4_t __s0_819 = __p0_819; \ + bfloat16x8_t __s1_819 = __p1_819; \ + bfloat16x8_t __s2_819 = __p2_819; \ + float32x4_t __rev0_819; __rev0_819 = __builtin_shufflevector(__s0_819, __s0_819, __lane_reverse_128_32); \ + bfloat16x8_t __rev1_819; __rev1_819 = __builtin_shufflevector(__s1_819, __s1_819, __lane_reverse_128_16); \ + bfloat16x8_t __rev2_819; __rev2_819 = __builtin_shufflevector(__s2_819, __s2_819, __lane_reverse_128_16); \ + __ret_819 = __noswap_vbfmlalbq_f32(__rev0_819, __rev1_819, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819), __noswap_vgetq_lane_bf16(__rev2_819, __p3_819)}); \ + __ret_819 = __builtin_shufflevector(__ret_819, __ret_819, __lane_reverse_128_32); \ + __ret_819; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlaltq_lane_f32(__p0_804, __p1_804, __p2_804, __p3_804) __extension__ ({ \ - float32x4_t __ret_804; \ - float32x4_t __s0_804 = __p0_804; \ - bfloat16x8_t __s1_804 = __p1_804; \ - bfloat16x4_t __s2_804 = __p2_804; \ - __ret_804 = vbfmlaltq_f32(__s0_804, __s1_804, (bfloat16x8_t) {vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804)}); \ - __ret_804; \ +#define vbfmlaltq_lane_f32(__p0_820, __p1_820, __p2_820, __p3_820) __extension__ ({ \ + float32x4_t __ret_820; \ + float32x4_t __s0_820 = __p0_820; \ + bfloat16x8_t __s1_820 = __p1_820; \ + bfloat16x4_t __s2_820 = __p2_820; \ + __ret_820 = vbfmlaltq_f32(__s0_820, __s1_820, (bfloat16x8_t) {vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820), vget_lane_bf16(__s2_820, __p3_820)}); \ + __ret_820; \ }) #else -#define vbfmlaltq_lane_f32(__p0_805, __p1_805, __p2_805, __p3_805) __extension__ ({ \ - float32x4_t __ret_805; \ - float32x4_t __s0_805 = __p0_805; \ - bfloat16x8_t __s1_805 = __p1_805; \ - bfloat16x4_t __s2_805 = __p2_805; \ - float32x4_t __rev0_805; __rev0_805 = __builtin_shufflevector(__s0_805, __s0_805, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_805; __rev1_805 = __builtin_shufflevector(__s1_805, __s1_805, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_805; __rev2_805 = __builtin_shufflevector(__s2_805, __s2_805, 3, 2, 1, 0); \ - __ret_805 = __noswap_vbfmlaltq_f32(__rev0_805, __rev1_805, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805)}); \ - __ret_805 = __builtin_shufflevector(__ret_805, __ret_805, 3, 2, 1, 0); \ - __ret_805; \ +#define vbfmlaltq_lane_f32(__p0_821, __p1_821, __p2_821, __p3_821) __extension__ ({ \ + float32x4_t __ret_821; \ + float32x4_t __s0_821 = __p0_821; \ + bfloat16x8_t __s1_821 = __p1_821; \ + bfloat16x4_t __s2_821 = __p2_821; \ + float32x4_t __rev0_821; __rev0_821 = __builtin_shufflevector(__s0_821, __s0_821, __lane_reverse_128_32); \ + bfloat16x8_t __rev1_821; __rev1_821 = __builtin_shufflevector(__s1_821, __s1_821, __lane_reverse_128_16); \ + bfloat16x4_t __rev2_821; __rev2_821 = __builtin_shufflevector(__s2_821, __s2_821, __lane_reverse_64_16); \ + __ret_821 = __noswap_vbfmlaltq_f32(__rev0_821, __rev1_821, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821), __noswap_vget_lane_bf16(__rev2_821, __p3_821)}); \ + __ret_821 = __builtin_shufflevector(__ret_821, __ret_821, __lane_reverse_128_32); \ + __ret_821; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlaltq_laneq_f32(__p0_806, __p1_806, __p2_806, __p3_806) __extension__ ({ \ - float32x4_t __ret_806; \ - float32x4_t __s0_806 = __p0_806; \ - bfloat16x8_t __s1_806 = __p1_806; \ - bfloat16x8_t __s2_806 = __p2_806; \ - __ret_806 = vbfmlaltq_f32(__s0_806, __s1_806, (bfloat16x8_t) {vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806)}); \ - __ret_806; \ +#define vbfmlaltq_laneq_f32(__p0_822, __p1_822, __p2_822, __p3_822) __extension__ ({ \ + float32x4_t __ret_822; \ + float32x4_t __s0_822 = __p0_822; \ + bfloat16x8_t __s1_822 = __p1_822; \ + bfloat16x8_t __s2_822 = __p2_822; \ + __ret_822 = vbfmlaltq_f32(__s0_822, __s1_822, (bfloat16x8_t) {vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822), vgetq_lane_bf16(__s2_822, __p3_822)}); \ + __ret_822; \ }) #else -#define vbfmlaltq_laneq_f32(__p0_807, __p1_807, __p2_807, __p3_807) __extension__ ({ \ - float32x4_t __ret_807; \ - float32x4_t __s0_807 = __p0_807; \ - bfloat16x8_t __s1_807 = __p1_807; \ - bfloat16x8_t __s2_807 = __p2_807; \ - float32x4_t __rev0_807; __rev0_807 = __builtin_shufflevector(__s0_807, __s0_807, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_807; __rev1_807 = __builtin_shufflevector(__s1_807, __s1_807, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_807; __rev2_807 = __builtin_shufflevector(__s2_807, __s2_807, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_807 = __noswap_vbfmlaltq_f32(__rev0_807, __rev1_807, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807)}); \ - __ret_807 = __builtin_shufflevector(__ret_807, __ret_807, 3, 2, 1, 0); \ - __ret_807; \ +#define vbfmlaltq_laneq_f32(__p0_823, __p1_823, __p2_823, __p3_823) __extension__ ({ \ + float32x4_t __ret_823; \ + float32x4_t __s0_823 = __p0_823; \ + bfloat16x8_t __s1_823 = __p1_823; \ + bfloat16x8_t __s2_823 = __p2_823; \ + float32x4_t __rev0_823; __rev0_823 = __builtin_shufflevector(__s0_823, __s0_823, __lane_reverse_128_32); \ + bfloat16x8_t __rev1_823; __rev1_823 = __builtin_shufflevector(__s1_823, __s1_823, __lane_reverse_128_16); \ + bfloat16x8_t __rev2_823; __rev2_823 = __builtin_shufflevector(__s2_823, __s2_823, __lane_reverse_128_16); \ + __ret_823 = __noswap_vbfmlaltq_f32(__rev0_823, __rev1_823, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823), __noswap_vgetq_lane_bf16(__rev2_823, __p3_823)}); \ + __ret_823 = __builtin_shufflevector(__ret_823, __ret_823, __lane_reverse_128_32); \ + __ret_823; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_808) { - float32x4_t __ret_808; - __ret_808 = __builtin_bit_cast(float32x4_t, vshll_n_u16(__builtin_bit_cast(uint16x4_t, __p0_808), 16)); - return __ret_808; +__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_824) { + float32x4_t __ret_824; + __ret_824 = __builtin_bit_cast(float32x4_t, vshll_n_u16(__builtin_bit_cast(uint16x4_t, __p0_824), 16)); + return __ret_824; } #else -__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_809) { - float32x4_t __ret_809; - bfloat16x4_t __rev0_809; __rev0_809 = __builtin_shufflevector(__p0_809, __p0_809, 3, 2, 1, 0); - __ret_809 = __builtin_bit_cast(float32x4_t, __noswap_vshll_n_u16(__builtin_bit_cast(uint16x4_t, __rev0_809), 16)); - __ret_809 = __builtin_shufflevector(__ret_809, __ret_809, 3, 2, 1, 0); - return __ret_809; +__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_825) { + float32x4_t __ret_825; + bfloat16x4_t __rev0_825; __rev0_825 = __builtin_shufflevector(__p0_825, __p0_825, __lane_reverse_64_16); + __ret_825 = __builtin_bit_cast(float32x4_t, __noswap_vshll_n_u16(__builtin_bit_cast(uint16x4_t, __rev0_825), 16)); + __ret_825 = __builtin_shufflevector(__ret_825, __ret_825, __lane_reverse_128_32); + return __ret_825; } -__ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vcvt_f32_bf16(bfloat16x4_t __p0_810) { - float32x4_t __ret_810; - __ret_810 = __builtin_bit_cast(float32x4_t, __noswap_vshll_n_u16(__builtin_bit_cast(uint16x4_t, __p0_810), 16)); - return __ret_810; +__ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vcvt_f32_bf16(bfloat16x4_t __p0_826) { + float32x4_t __ret_826; + __ret_826 = __builtin_bit_cast(float32x4_t, __noswap_vshll_n_u16(__builtin_bit_cast(uint16x4_t, __p0_826), 16)); + return __ret_826; } #endif @@ -68468,9 +70986,9 @@ __ai __attribute__((target("bf16,neon"))) float32x4_t vcvtq_high_f32_bf16(bfloat #else __ai __attribute__((target("bf16,neon"))) float32x4_t vcvtq_high_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vcvt_f32_bf16(__noswap_vget_high_bf16(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68484,247 +71002,247 @@ __ai __attribute__((target("bf16,neon"))) float32x4_t vcvtq_low_f32_bf16(bfloat1 #else __ai __attribute__((target("bf16,neon"))) float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; - bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); __ret = __noswap_vcvt_f32_bf16(__noswap_vget_low_bf16(__rev0)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vdotq_lane_u32(__p0_811, __p1_811, __p2_811, __p3_811) __extension__ ({ \ - uint32x4_t __ret_811; \ - uint32x4_t __s0_811 = __p0_811; \ - uint8x16_t __s1_811 = __p1_811; \ - uint8x8_t __s2_811 = __p2_811; \ - __ret_811 = vdotq_u32(__s0_811, __s1_811, __builtin_bit_cast(uint8x16_t, splatq_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_811), __p3_811))); \ - __ret_811; \ -}) -#else -#define vdotq_lane_u32(__p0_812, __p1_812, __p2_812, __p3_812) __extension__ ({ \ - uint32x4_t __ret_812; \ - uint32x4_t __s0_812 = __p0_812; \ - uint8x16_t __s1_812 = __p1_812; \ - uint8x8_t __s2_812 = __p2_812; \ - uint32x4_t __rev0_812; __rev0_812 = __builtin_shufflevector(__s0_812, __s0_812, 3, 2, 1, 0); \ - uint8x16_t __rev1_812; __rev1_812 = __builtin_shufflevector(__s1_812, __s1_812, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_812; __rev2_812 = __builtin_shufflevector(__s2_812, __s2_812, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_812 = __noswap_vdotq_u32(__rev0_812, __rev1_812, __builtin_bit_cast(uint8x16_t, __noswap_splatq_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_812), __p3_812))); \ - __ret_812 = __builtin_shufflevector(__ret_812, __ret_812, 3, 2, 1, 0); \ - __ret_812; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdotq_lane_s32(__p0_813, __p1_813, __p2_813, __p3_813) __extension__ ({ \ - int32x4_t __ret_813; \ - int32x4_t __s0_813 = __p0_813; \ - int8x16_t __s1_813 = __p1_813; \ - int8x8_t __s2_813 = __p2_813; \ - __ret_813 = vdotq_s32(__s0_813, __s1_813, __builtin_bit_cast(int8x16_t, splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_813), __p3_813))); \ - __ret_813; \ -}) -#else -#define vdotq_lane_s32(__p0_814, __p1_814, __p2_814, __p3_814) __extension__ ({ \ - int32x4_t __ret_814; \ - int32x4_t __s0_814 = __p0_814; \ - int8x16_t __s1_814 = __p1_814; \ - int8x8_t __s2_814 = __p2_814; \ - int32x4_t __rev0_814; __rev0_814 = __builtin_shufflevector(__s0_814, __s0_814, 3, 2, 1, 0); \ - int8x16_t __rev1_814; __rev1_814 = __builtin_shufflevector(__s1_814, __s1_814, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_814; __rev2_814 = __builtin_shufflevector(__s2_814, __s2_814, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_814 = __noswap_vdotq_s32(__rev0_814, __rev1_814, __builtin_bit_cast(int8x16_t, __noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_814), __p3_814))); \ - __ret_814 = __builtin_shufflevector(__ret_814, __ret_814, 3, 2, 1, 0); \ - __ret_814; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdot_lane_u32(__p0_815, __p1_815, __p2_815, __p3_815) __extension__ ({ \ - uint32x2_t __ret_815; \ - uint32x2_t __s0_815 = __p0_815; \ - uint8x8_t __s1_815 = __p1_815; \ - uint8x8_t __s2_815 = __p2_815; \ - __ret_815 = vdot_u32(__s0_815, __s1_815, __builtin_bit_cast(uint8x8_t, splat_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_815), __p3_815))); \ - __ret_815; \ -}) -#else -#define vdot_lane_u32(__p0_816, __p1_816, __p2_816, __p3_816) __extension__ ({ \ - uint32x2_t __ret_816; \ - uint32x2_t __s0_816 = __p0_816; \ - uint8x8_t __s1_816 = __p1_816; \ - uint8x8_t __s2_816 = __p2_816; \ - uint32x2_t __rev0_816; __rev0_816 = __builtin_shufflevector(__s0_816, __s0_816, 1, 0); \ - uint8x8_t __rev1_816; __rev1_816 = __builtin_shufflevector(__s1_816, __s1_816, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_816; __rev2_816 = __builtin_shufflevector(__s2_816, __s2_816, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_816 = __noswap_vdot_u32(__rev0_816, __rev1_816, __builtin_bit_cast(uint8x8_t, __noswap_splat_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_816), __p3_816))); \ - __ret_816 = __builtin_shufflevector(__ret_816, __ret_816, 1, 0); \ - __ret_816; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vdot_lane_s32(__p0_817, __p1_817, __p2_817, __p3_817) __extension__ ({ \ - int32x2_t __ret_817; \ - int32x2_t __s0_817 = __p0_817; \ - int8x8_t __s1_817 = __p1_817; \ - int8x8_t __s2_817 = __p2_817; \ - __ret_817 = vdot_s32(__s0_817, __s1_817, __builtin_bit_cast(int8x8_t, splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_817), __p3_817))); \ - __ret_817; \ -}) -#else -#define vdot_lane_s32(__p0_818, __p1_818, __p2_818, __p3_818) __extension__ ({ \ - int32x2_t __ret_818; \ - int32x2_t __s0_818 = __p0_818; \ - int8x8_t __s1_818 = __p1_818; \ - int8x8_t __s2_818 = __p2_818; \ - int32x2_t __rev0_818; __rev0_818 = __builtin_shufflevector(__s0_818, __s0_818, 1, 0); \ - int8x8_t __rev1_818; __rev1_818 = __builtin_shufflevector(__s1_818, __s1_818, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_818; __rev2_818 = __builtin_shufflevector(__s2_818, __s2_818, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_818 = __noswap_vdot_s32(__rev0_818, __rev1_818, __builtin_bit_cast(int8x8_t, __noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_818), __p3_818))); \ - __ret_818 = __builtin_shufflevector(__ret_818, __ret_818, 1, 0); \ - __ret_818; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f16(__p0_819, __p1_819, __p2_819) __extension__ ({ \ - float16x8_t __ret_819; \ - float16x8_t __s0_819 = __p0_819; \ - float16x4_t __s1_819 = __p1_819; \ - __ret_819 = __s0_819 * splatq_lane_f16(__s1_819, __p2_819); \ - __ret_819; \ -}) -#else -#define vmulq_lane_f16(__p0_820, __p1_820, __p2_820) __extension__ ({ \ - float16x8_t __ret_820; \ - float16x8_t __s0_820 = __p0_820; \ - float16x4_t __s1_820 = __p1_820; \ - float16x8_t __rev0_820; __rev0_820 = __builtin_shufflevector(__s0_820, __s0_820, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1_820; __rev1_820 = __builtin_shufflevector(__s1_820, __s1_820, 3, 2, 1, 0); \ - __ret_820 = __rev0_820 * __noswap_splatq_lane_f16(__rev1_820, __p2_820); \ - __ret_820 = __builtin_shufflevector(__ret_820, __ret_820, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_820; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmul_lane_f16(__p0_821, __p1_821, __p2_821) __extension__ ({ \ - float16x4_t __ret_821; \ - float16x4_t __s0_821 = __p0_821; \ - float16x4_t __s1_821 = __p1_821; \ - __ret_821 = __s0_821 * splat_lane_f16(__s1_821, __p2_821); \ - __ret_821; \ -}) -#else -#define vmul_lane_f16(__p0_822, __p1_822, __p2_822) __extension__ ({ \ - float16x4_t __ret_822; \ - float16x4_t __s0_822 = __p0_822; \ - float16x4_t __s1_822 = __p1_822; \ - float16x4_t __rev0_822; __rev0_822 = __builtin_shufflevector(__s0_822, __s0_822, 3, 2, 1, 0); \ - float16x4_t __rev1_822; __rev1_822 = __builtin_shufflevector(__s1_822, __s1_822, 3, 2, 1, 0); \ - __ret_822 = __rev0_822 * __noswap_splat_lane_f16(__rev1_822, __p2_822); \ - __ret_822 = __builtin_shufflevector(__ret_822, __ret_822, 3, 2, 1, 0); \ - __ret_822; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsudotq_lane_s32(__p0_823, __p1_823, __p2_823, __p3_823) __extension__ ({ \ - int32x4_t __ret_823; \ - int32x4_t __s0_823 = __p0_823; \ - int8x16_t __s1_823 = __p1_823; \ - uint8x8_t __s2_823 = __p2_823; \ - __ret_823 = vusdotq_s32(__s0_823, (uint8x16_t)(splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_823), __p3_823)), __s1_823); \ - __ret_823; \ -}) -#else -#define vsudotq_lane_s32(__p0_824, __p1_824, __p2_824, __p3_824) __extension__ ({ \ - int32x4_t __ret_824; \ - int32x4_t __s0_824 = __p0_824; \ - int8x16_t __s1_824 = __p1_824; \ - uint8x8_t __s2_824 = __p2_824; \ - int32x4_t __rev0_824; __rev0_824 = __builtin_shufflevector(__s0_824, __s0_824, 3, 2, 1, 0); \ - int8x16_t __rev1_824; __rev1_824 = __builtin_shufflevector(__s1_824, __s1_824, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_824; __rev2_824 = __builtin_shufflevector(__s2_824, __s2_824, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_824 = __noswap_vusdotq_s32(__rev0_824, (uint8x16_t)(__noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_824), __p3_824)), __rev1_824); \ - __ret_824 = __builtin_shufflevector(__ret_824, __ret_824, 3, 2, 1, 0); \ - __ret_824; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vsudot_lane_s32(__p0_825, __p1_825, __p2_825, __p3_825) __extension__ ({ \ - int32x2_t __ret_825; \ - int32x2_t __s0_825 = __p0_825; \ - int8x8_t __s1_825 = __p1_825; \ - uint8x8_t __s2_825 = __p2_825; \ - __ret_825 = vusdot_s32(__s0_825, (uint8x8_t)(splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_825), __p3_825)), __s1_825); \ - __ret_825; \ -}) -#else -#define vsudot_lane_s32(__p0_826, __p1_826, __p2_826, __p3_826) __extension__ ({ \ - int32x2_t __ret_826; \ - int32x2_t __s0_826 = __p0_826; \ - int8x8_t __s1_826 = __p1_826; \ - uint8x8_t __s2_826 = __p2_826; \ - int32x2_t __rev0_826; __rev0_826 = __builtin_shufflevector(__s0_826, __s0_826, 1, 0); \ - int8x8_t __rev1_826; __rev1_826 = __builtin_shufflevector(__s1_826, __s1_826, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_826; __rev2_826 = __builtin_shufflevector(__s2_826, __s2_826, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_826 = __noswap_vusdot_s32(__rev0_826, (uint8x8_t)(__noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_826), __p3_826)), __rev1_826); \ - __ret_826 = __builtin_shufflevector(__ret_826, __ret_826, 1, 0); \ - __ret_826; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vusdotq_lane_s32(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ - int32x4_t __ret_827; \ - int32x4_t __s0_827 = __p0_827; \ +#define vdotq_lane_u32(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ + uint32x4_t __ret_827; \ + uint32x4_t __s0_827 = __p0_827; \ uint8x16_t __s1_827 = __p1_827; \ - int8x8_t __s2_827 = __p2_827; \ - __ret_827 = vusdotq_s32(__s0_827, __s1_827, (int8x16_t)(splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_827), __p3_827))); \ + uint8x8_t __s2_827 = __p2_827; \ + __ret_827 = vdotq_u32(__s0_827, __s1_827, __builtin_bit_cast(uint8x16_t, splatq_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_827), __p3_827))); \ __ret_827; \ }) #else -#define vusdotq_lane_s32(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ - int32x4_t __ret_828; \ - int32x4_t __s0_828 = __p0_828; \ +#define vdotq_lane_u32(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ + uint32x4_t __ret_828; \ + uint32x4_t __s0_828 = __p0_828; \ uint8x16_t __s1_828 = __p1_828; \ - int8x8_t __s2_828 = __p2_828; \ - int32x4_t __rev0_828; __rev0_828 = __builtin_shufflevector(__s0_828, __s0_828, 3, 2, 1, 0); \ - uint8x16_t __rev1_828; __rev1_828 = __builtin_shufflevector(__s1_828, __s1_828, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_828; __rev2_828 = __builtin_shufflevector(__s2_828, __s2_828, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_828 = __noswap_vusdotq_s32(__rev0_828, __rev1_828, (int8x16_t)(__noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_828), __p3_828))); \ - __ret_828 = __builtin_shufflevector(__ret_828, __ret_828, 3, 2, 1, 0); \ + uint8x8_t __s2_828 = __p2_828; \ + uint32x4_t __rev0_828; __rev0_828 = __builtin_shufflevector(__s0_828, __s0_828, __lane_reverse_128_32); \ + uint8x16_t __rev1_828; __rev1_828 = __builtin_shufflevector(__s1_828, __s1_828, __lane_reverse_128_8); \ + uint8x8_t __rev2_828; __rev2_828 = __builtin_shufflevector(__s2_828, __s2_828, __lane_reverse_64_8); \ + __ret_828 = __noswap_vdotq_u32(__rev0_828, __rev1_828, __builtin_bit_cast(uint8x16_t, __noswap_splatq_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_828), __p3_828))); \ + __ret_828 = __builtin_shufflevector(__ret_828, __ret_828, __lane_reverse_128_32); \ __ret_828; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vusdot_lane_s32(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ - int32x2_t __ret_829; \ - int32x2_t __s0_829 = __p0_829; \ - uint8x8_t __s1_829 = __p1_829; \ +#define vdotq_lane_s32(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ + int32x4_t __ret_829; \ + int32x4_t __s0_829 = __p0_829; \ + int8x16_t __s1_829 = __p1_829; \ int8x8_t __s2_829 = __p2_829; \ - __ret_829 = vusdot_s32(__s0_829, __s1_829, (int8x8_t)(splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_829), __p3_829))); \ + __ret_829 = vdotq_s32(__s0_829, __s1_829, __builtin_bit_cast(int8x16_t, splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_829), __p3_829))); \ __ret_829; \ }) #else -#define vusdot_lane_s32(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ - int32x2_t __ret_830; \ - int32x2_t __s0_830 = __p0_830; \ - uint8x8_t __s1_830 = __p1_830; \ +#define vdotq_lane_s32(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ + int32x4_t __ret_830; \ + int32x4_t __s0_830 = __p0_830; \ + int8x16_t __s1_830 = __p1_830; \ int8x8_t __s2_830 = __p2_830; \ - int32x2_t __rev0_830; __rev0_830 = __builtin_shufflevector(__s0_830, __s0_830, 1, 0); \ - uint8x8_t __rev1_830; __rev1_830 = __builtin_shufflevector(__s1_830, __s1_830, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_830; __rev2_830 = __builtin_shufflevector(__s2_830, __s2_830, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_830 = __noswap_vusdot_s32(__rev0_830, __rev1_830, (int8x8_t)(__noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_830), __p3_830))); \ - __ret_830 = __builtin_shufflevector(__ret_830, __ret_830, 1, 0); \ + int32x4_t __rev0_830; __rev0_830 = __builtin_shufflevector(__s0_830, __s0_830, __lane_reverse_128_32); \ + int8x16_t __rev1_830; __rev1_830 = __builtin_shufflevector(__s1_830, __s1_830, __lane_reverse_128_8); \ + int8x8_t __rev2_830; __rev2_830 = __builtin_shufflevector(__s2_830, __s2_830, __lane_reverse_64_8); \ + __ret_830 = __noswap_vdotq_s32(__rev0_830, __rev1_830, __builtin_bit_cast(int8x16_t, __noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_830), __p3_830))); \ + __ret_830 = __builtin_shufflevector(__ret_830, __ret_830, __lane_reverse_128_32); \ __ret_830; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vdot_lane_u32(__p0_831, __p1_831, __p2_831, __p3_831) __extension__ ({ \ + uint32x2_t __ret_831; \ + uint32x2_t __s0_831 = __p0_831; \ + uint8x8_t __s1_831 = __p1_831; \ + uint8x8_t __s2_831 = __p2_831; \ + __ret_831 = vdot_u32(__s0_831, __s1_831, __builtin_bit_cast(uint8x8_t, splat_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_831), __p3_831))); \ + __ret_831; \ +}) +#else +#define vdot_lane_u32(__p0_832, __p1_832, __p2_832, __p3_832) __extension__ ({ \ + uint32x2_t __ret_832; \ + uint32x2_t __s0_832 = __p0_832; \ + uint8x8_t __s1_832 = __p1_832; \ + uint8x8_t __s2_832 = __p2_832; \ + uint32x2_t __rev0_832; __rev0_832 = __builtin_shufflevector(__s0_832, __s0_832, __lane_reverse_64_32); \ + uint8x8_t __rev1_832; __rev1_832 = __builtin_shufflevector(__s1_832, __s1_832, __lane_reverse_64_8); \ + uint8x8_t __rev2_832; __rev2_832 = __builtin_shufflevector(__s2_832, __s2_832, __lane_reverse_64_8); \ + __ret_832 = __noswap_vdot_u32(__rev0_832, __rev1_832, __builtin_bit_cast(uint8x8_t, __noswap_splat_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_832), __p3_832))); \ + __ret_832 = __builtin_shufflevector(__ret_832, __ret_832, __lane_reverse_64_32); \ + __ret_832; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdot_lane_s32(__p0_833, __p1_833, __p2_833, __p3_833) __extension__ ({ \ + int32x2_t __ret_833; \ + int32x2_t __s0_833 = __p0_833; \ + int8x8_t __s1_833 = __p1_833; \ + int8x8_t __s2_833 = __p2_833; \ + __ret_833 = vdot_s32(__s0_833, __s1_833, __builtin_bit_cast(int8x8_t, splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_833), __p3_833))); \ + __ret_833; \ +}) +#else +#define vdot_lane_s32(__p0_834, __p1_834, __p2_834, __p3_834) __extension__ ({ \ + int32x2_t __ret_834; \ + int32x2_t __s0_834 = __p0_834; \ + int8x8_t __s1_834 = __p1_834; \ + int8x8_t __s2_834 = __p2_834; \ + int32x2_t __rev0_834; __rev0_834 = __builtin_shufflevector(__s0_834, __s0_834, __lane_reverse_64_32); \ + int8x8_t __rev1_834; __rev1_834 = __builtin_shufflevector(__s1_834, __s1_834, __lane_reverse_64_8); \ + int8x8_t __rev2_834; __rev2_834 = __builtin_shufflevector(__s2_834, __s2_834, __lane_reverse_64_8); \ + __ret_834 = __noswap_vdot_s32(__rev0_834, __rev1_834, __builtin_bit_cast(int8x8_t, __noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_834), __p3_834))); \ + __ret_834 = __builtin_shufflevector(__ret_834, __ret_834, __lane_reverse_64_32); \ + __ret_834; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulq_lane_f16(__p0_835, __p1_835, __p2_835) __extension__ ({ \ + float16x8_t __ret_835; \ + float16x8_t __s0_835 = __p0_835; \ + float16x4_t __s1_835 = __p1_835; \ + __ret_835 = __s0_835 * splatq_lane_f16(__s1_835, __p2_835); \ + __ret_835; \ +}) +#else +#define vmulq_lane_f16(__p0_836, __p1_836, __p2_836) __extension__ ({ \ + float16x8_t __ret_836; \ + float16x8_t __s0_836 = __p0_836; \ + float16x4_t __s1_836 = __p1_836; \ + float16x8_t __rev0_836; __rev0_836 = __builtin_shufflevector(__s0_836, __s0_836, __lane_reverse_128_16); \ + float16x4_t __rev1_836; __rev1_836 = __builtin_shufflevector(__s1_836, __s1_836, __lane_reverse_64_16); \ + __ret_836 = __rev0_836 * __noswap_splatq_lane_f16(__rev1_836, __p2_836); \ + __ret_836 = __builtin_shufflevector(__ret_836, __ret_836, __lane_reverse_128_16); \ + __ret_836; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmul_lane_f16(__p0_837, __p1_837, __p2_837) __extension__ ({ \ + float16x4_t __ret_837; \ + float16x4_t __s0_837 = __p0_837; \ + float16x4_t __s1_837 = __p1_837; \ + __ret_837 = __s0_837 * splat_lane_f16(__s1_837, __p2_837); \ + __ret_837; \ +}) +#else +#define vmul_lane_f16(__p0_838, __p1_838, __p2_838) __extension__ ({ \ + float16x4_t __ret_838; \ + float16x4_t __s0_838 = __p0_838; \ + float16x4_t __s1_838 = __p1_838; \ + float16x4_t __rev0_838; __rev0_838 = __builtin_shufflevector(__s0_838, __s0_838, __lane_reverse_64_16); \ + float16x4_t __rev1_838; __rev1_838 = __builtin_shufflevector(__s1_838, __s1_838, __lane_reverse_64_16); \ + __ret_838 = __rev0_838 * __noswap_splat_lane_f16(__rev1_838, __p2_838); \ + __ret_838 = __builtin_shufflevector(__ret_838, __ret_838, __lane_reverse_64_16); \ + __ret_838; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsudotq_lane_s32(__p0_839, __p1_839, __p2_839, __p3_839) __extension__ ({ \ + int32x4_t __ret_839; \ + int32x4_t __s0_839 = __p0_839; \ + int8x16_t __s1_839 = __p1_839; \ + uint8x8_t __s2_839 = __p2_839; \ + __ret_839 = vusdotq_s32(__s0_839, __builtin_bit_cast(uint8x16_t, splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_839), __p3_839)), __s1_839); \ + __ret_839; \ +}) +#else +#define vsudotq_lane_s32(__p0_840, __p1_840, __p2_840, __p3_840) __extension__ ({ \ + int32x4_t __ret_840; \ + int32x4_t __s0_840 = __p0_840; \ + int8x16_t __s1_840 = __p1_840; \ + uint8x8_t __s2_840 = __p2_840; \ + int32x4_t __rev0_840; __rev0_840 = __builtin_shufflevector(__s0_840, __s0_840, __lane_reverse_128_32); \ + int8x16_t __rev1_840; __rev1_840 = __builtin_shufflevector(__s1_840, __s1_840, __lane_reverse_128_8); \ + uint8x8_t __rev2_840; __rev2_840 = __builtin_shufflevector(__s2_840, __s2_840, __lane_reverse_64_8); \ + __ret_840 = __noswap_vusdotq_s32(__rev0_840, __builtin_bit_cast(uint8x16_t, __noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_840), __p3_840)), __rev1_840); \ + __ret_840 = __builtin_shufflevector(__ret_840, __ret_840, __lane_reverse_128_32); \ + __ret_840; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vsudot_lane_s32(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ + int32x2_t __ret_841; \ + int32x2_t __s0_841 = __p0_841; \ + int8x8_t __s1_841 = __p1_841; \ + uint8x8_t __s2_841 = __p2_841; \ + __ret_841 = vusdot_s32(__s0_841, __builtin_bit_cast(uint8x8_t, splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_841), __p3_841)), __s1_841); \ + __ret_841; \ +}) +#else +#define vsudot_lane_s32(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ + int32x2_t __ret_842; \ + int32x2_t __s0_842 = __p0_842; \ + int8x8_t __s1_842 = __p1_842; \ + uint8x8_t __s2_842 = __p2_842; \ + int32x2_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, __lane_reverse_64_32); \ + int8x8_t __rev1_842; __rev1_842 = __builtin_shufflevector(__s1_842, __s1_842, __lane_reverse_64_8); \ + uint8x8_t __rev2_842; __rev2_842 = __builtin_shufflevector(__s2_842, __s2_842, __lane_reverse_64_8); \ + __ret_842 = __noswap_vusdot_s32(__rev0_842, __builtin_bit_cast(uint8x8_t, __noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_842), __p3_842)), __rev1_842); \ + __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, __lane_reverse_64_32); \ + __ret_842; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vusdotq_lane_s32(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ + int32x4_t __ret_843; \ + int32x4_t __s0_843 = __p0_843; \ + uint8x16_t __s1_843 = __p1_843; \ + int8x8_t __s2_843 = __p2_843; \ + __ret_843 = vusdotq_s32(__s0_843, __s1_843, __builtin_bit_cast(int8x16_t, splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_843), __p3_843))); \ + __ret_843; \ +}) +#else +#define vusdotq_lane_s32(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ + int32x4_t __ret_844; \ + int32x4_t __s0_844 = __p0_844; \ + uint8x16_t __s1_844 = __p1_844; \ + int8x8_t __s2_844 = __p2_844; \ + int32x4_t __rev0_844; __rev0_844 = __builtin_shufflevector(__s0_844, __s0_844, __lane_reverse_128_32); \ + uint8x16_t __rev1_844; __rev1_844 = __builtin_shufflevector(__s1_844, __s1_844, __lane_reverse_128_8); \ + int8x8_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, __lane_reverse_64_8); \ + __ret_844 = __noswap_vusdotq_s32(__rev0_844, __rev1_844, __builtin_bit_cast(int8x16_t, __noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_844), __p3_844))); \ + __ret_844 = __builtin_shufflevector(__ret_844, __ret_844, __lane_reverse_128_32); \ + __ret_844; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vusdot_lane_s32(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ + int32x2_t __ret_845; \ + int32x2_t __s0_845 = __p0_845; \ + uint8x8_t __s1_845 = __p1_845; \ + int8x8_t __s2_845 = __p2_845; \ + __ret_845 = vusdot_s32(__s0_845, __s1_845, __builtin_bit_cast(int8x8_t, splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_845), __p3_845))); \ + __ret_845; \ +}) +#else +#define vusdot_lane_s32(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ + int32x2_t __ret_846; \ + int32x2_t __s0_846 = __p0_846; \ + uint8x8_t __s1_846 = __p1_846; \ + int8x8_t __s2_846 = __p2_846; \ + int32x2_t __rev0_846; __rev0_846 = __builtin_shufflevector(__s0_846, __s0_846, __lane_reverse_64_32); \ + uint8x8_t __rev1_846; __rev1_846 = __builtin_shufflevector(__s1_846, __s1_846, __lane_reverse_64_8); \ + int8x8_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, __lane_reverse_64_8); \ + __ret_846 = __noswap_vusdot_s32(__rev0_846, __rev1_846, __builtin_bit_cast(int8x8_t, __noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_846), __p3_846))); \ + __ret_846 = __builtin_shufflevector(__ret_846, __ret_846, __lane_reverse_64_32); \ + __ret_846; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint8x16_t vabaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; @@ -68734,11 +71252,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vabaq_u8(uint8x16_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint8x16_t vabaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __rev0 + __noswap_vabdq_u8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -68752,11 +71270,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vabaq_u32(uint32x4_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint32x4_t vabaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 + __noswap_vabdq_u32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68770,11 +71288,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vabaq_u16(uint16x8_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint16x8_t vabaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __rev0 + __noswap_vabdq_u16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -68788,11 +71306,11 @@ __ai __attribute__((target("neon"))) int8x16_t vabaq_s8(int8x16_t __p0, int8x16_ #else __ai __attribute__((target("neon"))) int8x16_t vabaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __rev0 + __noswap_vabdq_s8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_8); return __ret; } #endif @@ -68806,11 +71324,11 @@ __ai __attribute__((target("neon"))) int32x4_t vabaq_s32(int32x4_t __p0, int32x4 #else __ai __attribute__((target("neon"))) int32x4_t vabaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __rev0 + __noswap_vabdq_s32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -68824,11 +71342,11 @@ __ai __attribute__((target("neon"))) int16x8_t vabaq_s16(int16x8_t __p0, int16x8 #else __ai __attribute__((target("neon"))) int16x8_t vabaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __rev0 + __noswap_vabdq_s16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -68842,11 +71360,11 @@ __ai __attribute__((target("neon"))) uint8x8_t vaba_u8(uint8x8_t __p0, uint8x8_t #else __ai __attribute__((target("neon"))) uint8x8_t vaba_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __noswap_vabd_u8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -68860,11 +71378,11 @@ __ai __attribute__((target("neon"))) uint32x2_t vaba_u32(uint32x2_t __p0, uint32 #else __ai __attribute__((target("neon"))) uint32x2_t vaba_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __noswap_vabd_u32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -68878,11 +71396,11 @@ __ai __attribute__((target("neon"))) uint16x4_t vaba_u16(uint16x4_t __p0, uint16 #else __ai __attribute__((target("neon"))) uint16x4_t vaba_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __noswap_vabd_u16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -68896,11 +71414,11 @@ __ai __attribute__((target("neon"))) int8x8_t vaba_s8(int8x8_t __p0, int8x8_t __ #else __ai __attribute__((target("neon"))) int8x8_t vaba_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __noswap_vabd_s8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_8); return __ret; } #endif @@ -68914,11 +71432,11 @@ __ai __attribute__((target("neon"))) int32x2_t vaba_s32(int32x2_t __p0, int32x2_ #else __ai __attribute__((target("neon"))) int32x2_t vaba_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __noswap_vabd_s32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_32); return __ret; } #endif @@ -68932,11 +71450,11 @@ __ai __attribute__((target("neon"))) int16x4_t vaba_s16(int16x4_t __p0, int16x4_ #else __ai __attribute__((target("neon"))) int16x4_t vaba_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __noswap_vabd_s16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_64_16); return __ret; } #endif @@ -68944,21 +71462,21 @@ __ai __attribute__((target("neon"))) int16x4_t vaba_s16(int16x4_t __p0, int16x4_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(vmovl_u8((uint8x8_t)(vabd_u8(__p0, __p1)))); + __ret = __builtin_bit_cast(uint16x8_t, vmovl_u8(__builtin_bit_cast(uint8x8_t, vabd_u8(__p0, __p1)))); return __ret; } #else __ai __attribute__((target("neon"))) uint16x8_t vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (uint16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_u8(__rev0, __rev1)))); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(uint16x8_t, __noswap_vmovl_u8(__builtin_bit_cast(uint8x8_t, __noswap_vabd_u8(__rev0, __rev1)))); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - __ret = (uint16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_u8(__p0, __p1)))); + __ret = __builtin_bit_cast(uint16x8_t, __noswap_vmovl_u8(__builtin_bit_cast(uint8x8_t, __noswap_vabd_u8(__p0, __p1)))); return __ret; } #endif @@ -68966,21 +71484,21 @@ __ai __attribute__((target("neon"))) uint16x8_t __noswap_vabdl_u8(uint8x8_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint64x2_t vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(vmovl_u32((uint32x2_t)(vabd_u32(__p0, __p1)))); + __ret = __builtin_bit_cast(uint64x2_t, vmovl_u32(__builtin_bit_cast(uint32x2_t, vabd_u32(__p0, __p1)))); return __ret; } #else __ai __attribute__((target("neon"))) uint64x2_t vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (uint64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_u32(__rev0, __rev1)))); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(uint64x2_t, __noswap_vmovl_u32(__builtin_bit_cast(uint32x2_t, __noswap_vabd_u32(__rev0, __rev1)))); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - __ret = (uint64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_u32(__p0, __p1)))); + __ret = __builtin_bit_cast(uint64x2_t, __noswap_vmovl_u32(__builtin_bit_cast(uint32x2_t, __noswap_vabd_u32(__p0, __p1)))); return __ret; } #endif @@ -68988,21 +71506,21 @@ __ai __attribute__((target("neon"))) uint64x2_t __noswap_vabdl_u32(uint32x2_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint32x4_t vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(vmovl_u16((uint16x4_t)(vabd_u16(__p0, __p1)))); + __ret = __builtin_bit_cast(uint32x4_t, vmovl_u16(__builtin_bit_cast(uint16x4_t, vabd_u16(__p0, __p1)))); return __ret; } #else __ai __attribute__((target("neon"))) uint32x4_t vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (uint32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_u16(__rev0, __rev1)))); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(uint32x4_t, __noswap_vmovl_u16(__builtin_bit_cast(uint16x4_t, __noswap_vabd_u16(__rev0, __rev1)))); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - __ret = (uint32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_u16(__p0, __p1)))); + __ret = __builtin_bit_cast(uint32x4_t, __noswap_vmovl_u16(__builtin_bit_cast(uint16x4_t, __noswap_vabd_u16(__p0, __p1)))); return __ret; } #endif @@ -69010,21 +71528,21 @@ __ai __attribute__((target("neon"))) uint32x4_t __noswap_vabdl_u16(uint16x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int16x8_t vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t)(vmovl_u8((uint8x8_t)(vabd_s8(__p0, __p1)))); + __ret = __builtin_bit_cast(int16x8_t, vmovl_u8(__builtin_bit_cast(uint8x8_t, vabd_s8(__p0, __p1)))); return __ret; } #else __ai __attribute__((target("neon"))) int16x8_t vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - __ret = (int16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_s8(__rev0, __rev1)))); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + __ret = __builtin_bit_cast(int16x8_t, __noswap_vmovl_u8(__builtin_bit_cast(uint8x8_t, __noswap_vabd_s8(__rev0, __rev1)))); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - __ret = (int16x8_t)(__noswap_vmovl_u8((uint8x8_t)(__noswap_vabd_s8(__p0, __p1)))); + __ret = __builtin_bit_cast(int16x8_t, __noswap_vmovl_u8(__builtin_bit_cast(uint8x8_t, __noswap_vabd_s8(__p0, __p1)))); return __ret; } #endif @@ -69032,21 +71550,21 @@ __ai __attribute__((target("neon"))) int16x8_t __noswap_vabdl_s8(int8x8_t __p0, #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int64x2_t vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t)(vmovl_u32((uint32x2_t)(vabd_s32(__p0, __p1)))); + __ret = __builtin_bit_cast(int64x2_t, vmovl_u32(__builtin_bit_cast(uint32x2_t, vabd_s32(__p0, __p1)))); return __ret; } #else __ai __attribute__((target("neon"))) int64x2_t vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = (int64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_s32(__rev0, __rev1)))); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + __ret = __builtin_bit_cast(int64x2_t, __noswap_vmovl_u32(__builtin_bit_cast(uint32x2_t, __noswap_vabd_s32(__rev0, __rev1)))); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - __ret = (int64x2_t)(__noswap_vmovl_u32((uint32x2_t)(__noswap_vabd_s32(__p0, __p1)))); + __ret = __builtin_bit_cast(int64x2_t, __noswap_vmovl_u32(__builtin_bit_cast(uint32x2_t, __noswap_vabd_s32(__p0, __p1)))); return __ret; } #endif @@ -69054,21 +71572,21 @@ __ai __attribute__((target("neon"))) int64x2_t __noswap_vabdl_s32(int32x2_t __p0 #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) int32x4_t vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t)(vmovl_u16((uint16x4_t)(vabd_s16(__p0, __p1)))); + __ret = __builtin_bit_cast(int32x4_t, vmovl_u16(__builtin_bit_cast(uint16x4_t, vabd_s16(__p0, __p1)))); return __ret; } #else __ai __attribute__((target("neon"))) int32x4_t vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - __ret = (int32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_s16(__rev0, __rev1)))); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + __ret = __builtin_bit_cast(int32x4_t, __noswap_vmovl_u16(__builtin_bit_cast(uint16x4_t, __noswap_vabd_s16(__rev0, __rev1)))); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - __ret = (int32x4_t)(__noswap_vmovl_u16((uint16x4_t)(__noswap_vabd_s16(__p0, __p1)))); + __ret = __builtin_bit_cast(int32x4_t, __noswap_vmovl_u16(__builtin_bit_cast(uint16x4_t, __noswap_vabd_s16(__p0, __p1)))); return __ret; } #endif @@ -69082,10 +71600,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vaddl_u8(uint8x8_t __p0, uint8x8 #else __ai __attribute__((target("neon"))) uint16x8_t vaddl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __noswap_vmovl_u8(__rev0) + __noswap_vmovl_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -69099,10 +71617,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vaddl_u32(uint32x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vaddl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vmovl_u32(__rev0) + __noswap_vmovl_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -69116,10 +71634,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vaddl_u16(uint16x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vaddl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __noswap_vmovl_u16(__rev0) + __noswap_vmovl_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -69133,10 +71651,10 @@ __ai __attribute__((target("neon"))) int16x8_t vaddl_s8(int8x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vaddl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_8); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __noswap_vmovl_s8(__rev0) + __noswap_vmovl_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -69150,10 +71668,10 @@ __ai __attribute__((target("neon"))) int64x2_t vaddl_s32(int32x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vaddl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_32); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __noswap_vmovl_s32(__rev0) + __noswap_vmovl_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -69167,10 +71685,10 @@ __ai __attribute__((target("neon"))) int32x4_t vaddl_s16(int16x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vaddl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_64_16); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __noswap_vmovl_s16(__rev0) + __noswap_vmovl_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -69184,10 +71702,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vaddw_u8(uint16x8_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint16x8_t vaddw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 + __noswap_vmovl_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -69201,10 +71719,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vaddw_u32(uint64x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vaddw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __noswap_vmovl_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -69218,10 +71736,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vaddw_u16(uint32x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vaddw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __noswap_vmovl_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -69235,10 +71753,10 @@ __ai __attribute__((target("neon"))) int16x8_t vaddw_s8(int16x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vaddw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); __ret = __rev0 + __noswap_vmovl_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -69252,10 +71770,10 @@ __ai __attribute__((target("neon"))) int64x2_t vaddw_s32(int64x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vaddw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __noswap_vmovl_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -69269,57 +71787,57 @@ __ai __attribute__((target("neon"))) int32x4_t vaddw_s16(int32x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vaddw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __noswap_vmovl_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vget_lane_f16(__p0_831, __p1_831) __extension__ ({ \ - float16_t __ret_831; \ - float16x4_t __s0_831 = __p0_831; \ - __ret_831 = __builtin_bit_cast(float16_t, vget_lane_s16(__builtin_bit_cast(int16x4_t, __s0_831), __p1_831)); \ - __ret_831; \ +#define vget_lane_f16(__p0_847, __p1_847) __extension__ ({ \ + float16_t __ret_847; \ + float16x4_t __s0_847 = __p0_847; \ + __ret_847 = __builtin_bit_cast(float16_t, vget_lane_s16(__builtin_bit_cast(int16x4_t, __s0_847), __p1_847)); \ + __ret_847; \ }) #else -#define vget_lane_f16(__p0_832, __p1_832) __extension__ ({ \ - float16_t __ret_832; \ - float16x4_t __s0_832 = __p0_832; \ - float16x4_t __rev0_832; __rev0_832 = __builtin_shufflevector(__s0_832, __s0_832, 3, 2, 1, 0); \ - __ret_832 = __builtin_bit_cast(float16_t, __noswap_vget_lane_s16(__builtin_bit_cast(int16x4_t, __rev0_832), __p1_832)); \ - __ret_832; \ +#define vget_lane_f16(__p0_848, __p1_848) __extension__ ({ \ + float16_t __ret_848; \ + float16x4_t __s0_848 = __p0_848; \ + float16x4_t __rev0_848; __rev0_848 = __builtin_shufflevector(__s0_848, __s0_848, __lane_reverse_64_16); \ + __ret_848 = __builtin_bit_cast(float16_t, __noswap_vget_lane_s16(__builtin_bit_cast(int16x4_t, __rev0_848), __p1_848)); \ + __ret_848; \ }) -#define __noswap_vget_lane_f16(__p0_833, __p1_833) __extension__ ({ \ - float16_t __ret_833; \ - float16x4_t __s0_833 = __p0_833; \ - __ret_833 = __builtin_bit_cast(float16_t, __noswap_vget_lane_s16(__builtin_bit_cast(int16x4_t, __s0_833), __p1_833)); \ - __ret_833; \ +#define __noswap_vget_lane_f16(__p0_849, __p1_849) __extension__ ({ \ + float16_t __ret_849; \ + float16x4_t __s0_849 = __p0_849; \ + __ret_849 = __builtin_bit_cast(float16_t, __noswap_vget_lane_s16(__builtin_bit_cast(int16x4_t, __s0_849), __p1_849)); \ + __ret_849; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vgetq_lane_f16(__p0_834, __p1_834) __extension__ ({ \ - float16_t __ret_834; \ - float16x8_t __s0_834 = __p0_834; \ - __ret_834 = __builtin_bit_cast(float16_t, vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __s0_834), __p1_834)); \ - __ret_834; \ +#define vgetq_lane_f16(__p0_850, __p1_850) __extension__ ({ \ + float16_t __ret_850; \ + float16x8_t __s0_850 = __p0_850; \ + __ret_850 = __builtin_bit_cast(float16_t, vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __s0_850), __p1_850)); \ + __ret_850; \ }) #else -#define vgetq_lane_f16(__p0_835, __p1_835) __extension__ ({ \ - float16_t __ret_835; \ - float16x8_t __s0_835 = __p0_835; \ - float16x8_t __rev0_835; __rev0_835 = __builtin_shufflevector(__s0_835, __s0_835, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_835 = __builtin_bit_cast(float16_t, __noswap_vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __rev0_835), __p1_835)); \ - __ret_835; \ +#define vgetq_lane_f16(__p0_851, __p1_851) __extension__ ({ \ + float16_t __ret_851; \ + float16x8_t __s0_851 = __p0_851; \ + float16x8_t __rev0_851; __rev0_851 = __builtin_shufflevector(__s0_851, __s0_851, __lane_reverse_128_16); \ + __ret_851 = __builtin_bit_cast(float16_t, __noswap_vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __rev0_851), __p1_851)); \ + __ret_851; \ }) -#define __noswap_vgetq_lane_f16(__p0_836, __p1_836) __extension__ ({ \ - float16_t __ret_836; \ - float16x8_t __s0_836 = __p0_836; \ - __ret_836 = __builtin_bit_cast(float16_t, __noswap_vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __s0_836), __p1_836)); \ - __ret_836; \ +#define __noswap_vgetq_lane_f16(__p0_852, __p1_852) __extension__ ({ \ + float16_t __ret_852; \ + float16x8_t __s0_852 = __p0_852; \ + __ret_852 = __builtin_bit_cast(float16_t, __noswap_vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __s0_852), __p1_852)); \ + __ret_852; \ }) #endif @@ -69332,11 +71850,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlal_u8(uint16x8_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint16x8_t vmlal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __noswap_vmull_u8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vmlal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { @@ -69355,11 +71873,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlal_u32(uint64x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vmlal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __noswap_vmull_u32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmlal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { @@ -69378,11 +71896,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlal_u16(uint32x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vmlal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __noswap_vmull_u16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmlal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { @@ -69401,11 +71919,11 @@ __ai __attribute__((target("neon"))) int16x8_t vmlal_s8(int16x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vmlal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __noswap_vmull_s8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vmlal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { @@ -69424,11 +71942,11 @@ __ai __attribute__((target("neon"))) int64x2_t vmlal_s32(int64x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __noswap_vmull_s32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { @@ -69447,11 +71965,11 @@ __ai __attribute__((target("neon"))) int32x4_t vmlal_s16(int32x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __noswap_vmull_s16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { @@ -69462,98 +71980,98 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlal_s16(int32x4_t __p0 #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_u32(__p0_837, __p1_837, __p2_837, __p3_837) __extension__ ({ \ - uint64x2_t __ret_837; \ - uint64x2_t __s0_837 = __p0_837; \ - uint32x2_t __s1_837 = __p1_837; \ - uint32x2_t __s2_837 = __p2_837; \ - __ret_837 = __s0_837 + vmull_u32(__s1_837, splat_lane_u32(__s2_837, __p3_837)); \ - __ret_837; \ +#define vmlal_lane_u32(__p0_853, __p1_853, __p2_853, __p3_853) __extension__ ({ \ + uint64x2_t __ret_853; \ + uint64x2_t __s0_853 = __p0_853; \ + uint32x2_t __s1_853 = __p1_853; \ + uint32x2_t __s2_853 = __p2_853; \ + __ret_853 = __s0_853 + vmull_u32(__s1_853, splat_lane_u32(__s2_853, __p3_853)); \ + __ret_853; \ }) #else -#define vmlal_lane_u32(__p0_838, __p1_838, __p2_838, __p3_838) __extension__ ({ \ - uint64x2_t __ret_838; \ - uint64x2_t __s0_838 = __p0_838; \ - uint32x2_t __s1_838 = __p1_838; \ - uint32x2_t __s2_838 = __p2_838; \ - uint64x2_t __rev0_838; __rev0_838 = __builtin_shufflevector(__s0_838, __s0_838, 1, 0); \ - uint32x2_t __rev1_838; __rev1_838 = __builtin_shufflevector(__s1_838, __s1_838, 1, 0); \ - uint32x2_t __rev2_838; __rev2_838 = __builtin_shufflevector(__s2_838, __s2_838, 1, 0); \ - __ret_838 = __rev0_838 + __noswap_vmull_u32(__rev1_838, __noswap_splat_lane_u32(__rev2_838, __p3_838)); \ - __ret_838 = __builtin_shufflevector(__ret_838, __ret_838, 1, 0); \ - __ret_838; \ +#define vmlal_lane_u32(__p0_854, __p1_854, __p2_854, __p3_854) __extension__ ({ \ + uint64x2_t __ret_854; \ + uint64x2_t __s0_854 = __p0_854; \ + uint32x2_t __s1_854 = __p1_854; \ + uint32x2_t __s2_854 = __p2_854; \ + uint64x2_t __rev0_854; __rev0_854 = __builtin_shufflevector(__s0_854, __s0_854, __lane_reverse_128_64); \ + uint32x2_t __rev1_854; __rev1_854 = __builtin_shufflevector(__s1_854, __s1_854, __lane_reverse_64_32); \ + uint32x2_t __rev2_854; __rev2_854 = __builtin_shufflevector(__s2_854, __s2_854, __lane_reverse_64_32); \ + __ret_854 = __rev0_854 + __noswap_vmull_u32(__rev1_854, __noswap_splat_lane_u32(__rev2_854, __p3_854)); \ + __ret_854 = __builtin_shufflevector(__ret_854, __ret_854, __lane_reverse_128_64); \ + __ret_854; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_u16(__p0_839, __p1_839, __p2_839, __p3_839) __extension__ ({ \ - uint32x4_t __ret_839; \ - uint32x4_t __s0_839 = __p0_839; \ - uint16x4_t __s1_839 = __p1_839; \ - uint16x4_t __s2_839 = __p2_839; \ - __ret_839 = __s0_839 + vmull_u16(__s1_839, splat_lane_u16(__s2_839, __p3_839)); \ - __ret_839; \ +#define vmlal_lane_u16(__p0_855, __p1_855, __p2_855, __p3_855) __extension__ ({ \ + uint32x4_t __ret_855; \ + uint32x4_t __s0_855 = __p0_855; \ + uint16x4_t __s1_855 = __p1_855; \ + uint16x4_t __s2_855 = __p2_855; \ + __ret_855 = __s0_855 + vmull_u16(__s1_855, splat_lane_u16(__s2_855, __p3_855)); \ + __ret_855; \ }) #else -#define vmlal_lane_u16(__p0_840, __p1_840, __p2_840, __p3_840) __extension__ ({ \ - uint32x4_t __ret_840; \ - uint32x4_t __s0_840 = __p0_840; \ - uint16x4_t __s1_840 = __p1_840; \ - uint16x4_t __s2_840 = __p2_840; \ - uint32x4_t __rev0_840; __rev0_840 = __builtin_shufflevector(__s0_840, __s0_840, 3, 2, 1, 0); \ - uint16x4_t __rev1_840; __rev1_840 = __builtin_shufflevector(__s1_840, __s1_840, 3, 2, 1, 0); \ - uint16x4_t __rev2_840; __rev2_840 = __builtin_shufflevector(__s2_840, __s2_840, 3, 2, 1, 0); \ - __ret_840 = __rev0_840 + __noswap_vmull_u16(__rev1_840, __noswap_splat_lane_u16(__rev2_840, __p3_840)); \ - __ret_840 = __builtin_shufflevector(__ret_840, __ret_840, 3, 2, 1, 0); \ - __ret_840; \ +#define vmlal_lane_u16(__p0_856, __p1_856, __p2_856, __p3_856) __extension__ ({ \ + uint32x4_t __ret_856; \ + uint32x4_t __s0_856 = __p0_856; \ + uint16x4_t __s1_856 = __p1_856; \ + uint16x4_t __s2_856 = __p2_856; \ + uint32x4_t __rev0_856; __rev0_856 = __builtin_shufflevector(__s0_856, __s0_856, __lane_reverse_128_32); \ + uint16x4_t __rev1_856; __rev1_856 = __builtin_shufflevector(__s1_856, __s1_856, __lane_reverse_64_16); \ + uint16x4_t __rev2_856; __rev2_856 = __builtin_shufflevector(__s2_856, __s2_856, __lane_reverse_64_16); \ + __ret_856 = __rev0_856 + __noswap_vmull_u16(__rev1_856, __noswap_splat_lane_u16(__rev2_856, __p3_856)); \ + __ret_856 = __builtin_shufflevector(__ret_856, __ret_856, __lane_reverse_128_32); \ + __ret_856; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_s32(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ - int64x2_t __ret_841; \ - int64x2_t __s0_841 = __p0_841; \ - int32x2_t __s1_841 = __p1_841; \ - int32x2_t __s2_841 = __p2_841; \ - __ret_841 = __s0_841 + vmull_s32(__s1_841, splat_lane_s32(__s2_841, __p3_841)); \ - __ret_841; \ +#define vmlal_lane_s32(__p0_857, __p1_857, __p2_857, __p3_857) __extension__ ({ \ + int64x2_t __ret_857; \ + int64x2_t __s0_857 = __p0_857; \ + int32x2_t __s1_857 = __p1_857; \ + int32x2_t __s2_857 = __p2_857; \ + __ret_857 = __s0_857 + vmull_s32(__s1_857, splat_lane_s32(__s2_857, __p3_857)); \ + __ret_857; \ }) #else -#define vmlal_lane_s32(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ - int64x2_t __ret_842; \ - int64x2_t __s0_842 = __p0_842; \ - int32x2_t __s1_842 = __p1_842; \ - int32x2_t __s2_842 = __p2_842; \ - int64x2_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, 1, 0); \ - int32x2_t __rev1_842; __rev1_842 = __builtin_shufflevector(__s1_842, __s1_842, 1, 0); \ - int32x2_t __rev2_842; __rev2_842 = __builtin_shufflevector(__s2_842, __s2_842, 1, 0); \ - __ret_842 = __rev0_842 + __noswap_vmull_s32(__rev1_842, __noswap_splat_lane_s32(__rev2_842, __p3_842)); \ - __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, 1, 0); \ - __ret_842; \ +#define vmlal_lane_s32(__p0_858, __p1_858, __p2_858, __p3_858) __extension__ ({ \ + int64x2_t __ret_858; \ + int64x2_t __s0_858 = __p0_858; \ + int32x2_t __s1_858 = __p1_858; \ + int32x2_t __s2_858 = __p2_858; \ + int64x2_t __rev0_858; __rev0_858 = __builtin_shufflevector(__s0_858, __s0_858, __lane_reverse_128_64); \ + int32x2_t __rev1_858; __rev1_858 = __builtin_shufflevector(__s1_858, __s1_858, __lane_reverse_64_32); \ + int32x2_t __rev2_858; __rev2_858 = __builtin_shufflevector(__s2_858, __s2_858, __lane_reverse_64_32); \ + __ret_858 = __rev0_858 + __noswap_vmull_s32(__rev1_858, __noswap_splat_lane_s32(__rev2_858, __p3_858)); \ + __ret_858 = __builtin_shufflevector(__ret_858, __ret_858, __lane_reverse_128_64); \ + __ret_858; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_s16(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ - int32x4_t __ret_843; \ - int32x4_t __s0_843 = __p0_843; \ - int16x4_t __s1_843 = __p1_843; \ - int16x4_t __s2_843 = __p2_843; \ - __ret_843 = __s0_843 + vmull_s16(__s1_843, splat_lane_s16(__s2_843, __p3_843)); \ - __ret_843; \ +#define vmlal_lane_s16(__p0_859, __p1_859, __p2_859, __p3_859) __extension__ ({ \ + int32x4_t __ret_859; \ + int32x4_t __s0_859 = __p0_859; \ + int16x4_t __s1_859 = __p1_859; \ + int16x4_t __s2_859 = __p2_859; \ + __ret_859 = __s0_859 + vmull_s16(__s1_859, splat_lane_s16(__s2_859, __p3_859)); \ + __ret_859; \ }) #else -#define vmlal_lane_s16(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ - int32x4_t __ret_844; \ - int32x4_t __s0_844 = __p0_844; \ - int16x4_t __s1_844 = __p1_844; \ - int16x4_t __s2_844 = __p2_844; \ - int32x4_t __rev0_844; __rev0_844 = __builtin_shufflevector(__s0_844, __s0_844, 3, 2, 1, 0); \ - int16x4_t __rev1_844; __rev1_844 = __builtin_shufflevector(__s1_844, __s1_844, 3, 2, 1, 0); \ - int16x4_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, 3, 2, 1, 0); \ - __ret_844 = __rev0_844 + __noswap_vmull_s16(__rev1_844, __noswap_splat_lane_s16(__rev2_844, __p3_844)); \ - __ret_844 = __builtin_shufflevector(__ret_844, __ret_844, 3, 2, 1, 0); \ - __ret_844; \ +#define vmlal_lane_s16(__p0_860, __p1_860, __p2_860, __p3_860) __extension__ ({ \ + int32x4_t __ret_860; \ + int32x4_t __s0_860 = __p0_860; \ + int16x4_t __s1_860 = __p1_860; \ + int16x4_t __s2_860 = __p2_860; \ + int32x4_t __rev0_860; __rev0_860 = __builtin_shufflevector(__s0_860, __s0_860, __lane_reverse_128_32); \ + int16x4_t __rev1_860; __rev1_860 = __builtin_shufflevector(__s1_860, __s1_860, __lane_reverse_64_16); \ + int16x4_t __rev2_860; __rev2_860 = __builtin_shufflevector(__s2_860, __s2_860, __lane_reverse_64_16); \ + __ret_860 = __rev0_860 + __noswap_vmull_s16(__rev1_860, __noswap_splat_lane_s16(__rev2_860, __p3_860)); \ + __ret_860 = __builtin_shufflevector(__ret_860, __ret_860, __lane_reverse_128_32); \ + __ret_860; \ }) #endif @@ -69566,10 +72084,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlal_n_u32(uint64x2_t __p0, uin #else __ai __attribute__((target("neon"))) uint64x2_t vmlal_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __noswap_vmull_u32(__rev1, (uint32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmlal_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { @@ -69588,10 +72106,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlal_n_u16(uint32x4_t __p0, uin #else __ai __attribute__((target("neon"))) uint32x4_t vmlal_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __noswap_vmull_u16(__rev1, (uint16x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmlal_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { @@ -69610,10 +72128,10 @@ __ai __attribute__((target("neon"))) int64x2_t vmlal_n_s32(int64x2_t __p0, int32 #else __ai __attribute__((target("neon"))) int64x2_t vmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 + __noswap_vmull_s32(__rev1, (int32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { @@ -69632,10 +72150,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmlal_n_s16(int32x4_t __p0, int16 #else __ai __attribute__((target("neon"))) int32x4_t vmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 + __noswap_vmull_s16(__rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { @@ -69654,11 +72172,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlsl_u8(uint16x8_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint16x8_t vmlsl_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 - __noswap_vmull_u8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vmlsl_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { @@ -69677,11 +72195,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlsl_u32(uint64x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vmlsl_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 - __noswap_vmull_u32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmlsl_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { @@ -69700,11 +72218,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlsl_u16(uint32x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vmlsl_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 - __noswap_vmull_u16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmlsl_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { @@ -69723,11 +72241,11 @@ __ai __attribute__((target("neon"))) int16x8_t vmlsl_s8(int16x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vmlsl_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 - __noswap_vmull_s8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vmlsl_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { @@ -69746,11 +72264,11 @@ __ai __attribute__((target("neon"))) int64x2_t vmlsl_s32(int64x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 - __noswap_vmull_s32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { @@ -69769,11 +72287,11 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsl_s16(int32x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 - __noswap_vmull_s16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { @@ -69784,98 +72302,98 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_s16(int32x4_t __p0 #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_u32(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ - uint64x2_t __ret_845; \ - uint64x2_t __s0_845 = __p0_845; \ - uint32x2_t __s1_845 = __p1_845; \ - uint32x2_t __s2_845 = __p2_845; \ - __ret_845 = __s0_845 - vmull_u32(__s1_845, splat_lane_u32(__s2_845, __p3_845)); \ - __ret_845; \ +#define vmlsl_lane_u32(__p0_861, __p1_861, __p2_861, __p3_861) __extension__ ({ \ + uint64x2_t __ret_861; \ + uint64x2_t __s0_861 = __p0_861; \ + uint32x2_t __s1_861 = __p1_861; \ + uint32x2_t __s2_861 = __p2_861; \ + __ret_861 = __s0_861 - vmull_u32(__s1_861, splat_lane_u32(__s2_861, __p3_861)); \ + __ret_861; \ }) #else -#define vmlsl_lane_u32(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ - uint64x2_t __ret_846; \ - uint64x2_t __s0_846 = __p0_846; \ - uint32x2_t __s1_846 = __p1_846; \ - uint32x2_t __s2_846 = __p2_846; \ - uint64x2_t __rev0_846; __rev0_846 = __builtin_shufflevector(__s0_846, __s0_846, 1, 0); \ - uint32x2_t __rev1_846; __rev1_846 = __builtin_shufflevector(__s1_846, __s1_846, 1, 0); \ - uint32x2_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, 1, 0); \ - __ret_846 = __rev0_846 - __noswap_vmull_u32(__rev1_846, __noswap_splat_lane_u32(__rev2_846, __p3_846)); \ - __ret_846 = __builtin_shufflevector(__ret_846, __ret_846, 1, 0); \ - __ret_846; \ +#define vmlsl_lane_u32(__p0_862, __p1_862, __p2_862, __p3_862) __extension__ ({ \ + uint64x2_t __ret_862; \ + uint64x2_t __s0_862 = __p0_862; \ + uint32x2_t __s1_862 = __p1_862; \ + uint32x2_t __s2_862 = __p2_862; \ + uint64x2_t __rev0_862; __rev0_862 = __builtin_shufflevector(__s0_862, __s0_862, __lane_reverse_128_64); \ + uint32x2_t __rev1_862; __rev1_862 = __builtin_shufflevector(__s1_862, __s1_862, __lane_reverse_64_32); \ + uint32x2_t __rev2_862; __rev2_862 = __builtin_shufflevector(__s2_862, __s2_862, __lane_reverse_64_32); \ + __ret_862 = __rev0_862 - __noswap_vmull_u32(__rev1_862, __noswap_splat_lane_u32(__rev2_862, __p3_862)); \ + __ret_862 = __builtin_shufflevector(__ret_862, __ret_862, __lane_reverse_128_64); \ + __ret_862; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_u16(__p0_847, __p1_847, __p2_847, __p3_847) __extension__ ({ \ - uint32x4_t __ret_847; \ - uint32x4_t __s0_847 = __p0_847; \ - uint16x4_t __s1_847 = __p1_847; \ - uint16x4_t __s2_847 = __p2_847; \ - __ret_847 = __s0_847 - vmull_u16(__s1_847, splat_lane_u16(__s2_847, __p3_847)); \ - __ret_847; \ +#define vmlsl_lane_u16(__p0_863, __p1_863, __p2_863, __p3_863) __extension__ ({ \ + uint32x4_t __ret_863; \ + uint32x4_t __s0_863 = __p0_863; \ + uint16x4_t __s1_863 = __p1_863; \ + uint16x4_t __s2_863 = __p2_863; \ + __ret_863 = __s0_863 - vmull_u16(__s1_863, splat_lane_u16(__s2_863, __p3_863)); \ + __ret_863; \ }) #else -#define vmlsl_lane_u16(__p0_848, __p1_848, __p2_848, __p3_848) __extension__ ({ \ - uint32x4_t __ret_848; \ - uint32x4_t __s0_848 = __p0_848; \ - uint16x4_t __s1_848 = __p1_848; \ - uint16x4_t __s2_848 = __p2_848; \ - uint32x4_t __rev0_848; __rev0_848 = __builtin_shufflevector(__s0_848, __s0_848, 3, 2, 1, 0); \ - uint16x4_t __rev1_848; __rev1_848 = __builtin_shufflevector(__s1_848, __s1_848, 3, 2, 1, 0); \ - uint16x4_t __rev2_848; __rev2_848 = __builtin_shufflevector(__s2_848, __s2_848, 3, 2, 1, 0); \ - __ret_848 = __rev0_848 - __noswap_vmull_u16(__rev1_848, __noswap_splat_lane_u16(__rev2_848, __p3_848)); \ - __ret_848 = __builtin_shufflevector(__ret_848, __ret_848, 3, 2, 1, 0); \ - __ret_848; \ +#define vmlsl_lane_u16(__p0_864, __p1_864, __p2_864, __p3_864) __extension__ ({ \ + uint32x4_t __ret_864; \ + uint32x4_t __s0_864 = __p0_864; \ + uint16x4_t __s1_864 = __p1_864; \ + uint16x4_t __s2_864 = __p2_864; \ + uint32x4_t __rev0_864; __rev0_864 = __builtin_shufflevector(__s0_864, __s0_864, __lane_reverse_128_32); \ + uint16x4_t __rev1_864; __rev1_864 = __builtin_shufflevector(__s1_864, __s1_864, __lane_reverse_64_16); \ + uint16x4_t __rev2_864; __rev2_864 = __builtin_shufflevector(__s2_864, __s2_864, __lane_reverse_64_16); \ + __ret_864 = __rev0_864 - __noswap_vmull_u16(__rev1_864, __noswap_splat_lane_u16(__rev2_864, __p3_864)); \ + __ret_864 = __builtin_shufflevector(__ret_864, __ret_864, __lane_reverse_128_32); \ + __ret_864; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_s32(__p0_849, __p1_849, __p2_849, __p3_849) __extension__ ({ \ - int64x2_t __ret_849; \ - int64x2_t __s0_849 = __p0_849; \ - int32x2_t __s1_849 = __p1_849; \ - int32x2_t __s2_849 = __p2_849; \ - __ret_849 = __s0_849 - vmull_s32(__s1_849, splat_lane_s32(__s2_849, __p3_849)); \ - __ret_849; \ +#define vmlsl_lane_s32(__p0_865, __p1_865, __p2_865, __p3_865) __extension__ ({ \ + int64x2_t __ret_865; \ + int64x2_t __s0_865 = __p0_865; \ + int32x2_t __s1_865 = __p1_865; \ + int32x2_t __s2_865 = __p2_865; \ + __ret_865 = __s0_865 - vmull_s32(__s1_865, splat_lane_s32(__s2_865, __p3_865)); \ + __ret_865; \ }) #else -#define vmlsl_lane_s32(__p0_850, __p1_850, __p2_850, __p3_850) __extension__ ({ \ - int64x2_t __ret_850; \ - int64x2_t __s0_850 = __p0_850; \ - int32x2_t __s1_850 = __p1_850; \ - int32x2_t __s2_850 = __p2_850; \ - int64x2_t __rev0_850; __rev0_850 = __builtin_shufflevector(__s0_850, __s0_850, 1, 0); \ - int32x2_t __rev1_850; __rev1_850 = __builtin_shufflevector(__s1_850, __s1_850, 1, 0); \ - int32x2_t __rev2_850; __rev2_850 = __builtin_shufflevector(__s2_850, __s2_850, 1, 0); \ - __ret_850 = __rev0_850 - __noswap_vmull_s32(__rev1_850, __noswap_splat_lane_s32(__rev2_850, __p3_850)); \ - __ret_850 = __builtin_shufflevector(__ret_850, __ret_850, 1, 0); \ - __ret_850; \ +#define vmlsl_lane_s32(__p0_866, __p1_866, __p2_866, __p3_866) __extension__ ({ \ + int64x2_t __ret_866; \ + int64x2_t __s0_866 = __p0_866; \ + int32x2_t __s1_866 = __p1_866; \ + int32x2_t __s2_866 = __p2_866; \ + int64x2_t __rev0_866; __rev0_866 = __builtin_shufflevector(__s0_866, __s0_866, __lane_reverse_128_64); \ + int32x2_t __rev1_866; __rev1_866 = __builtin_shufflevector(__s1_866, __s1_866, __lane_reverse_64_32); \ + int32x2_t __rev2_866; __rev2_866 = __builtin_shufflevector(__s2_866, __s2_866, __lane_reverse_64_32); \ + __ret_866 = __rev0_866 - __noswap_vmull_s32(__rev1_866, __noswap_splat_lane_s32(__rev2_866, __p3_866)); \ + __ret_866 = __builtin_shufflevector(__ret_866, __ret_866, __lane_reverse_128_64); \ + __ret_866; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_s16(__p0_851, __p1_851, __p2_851, __p3_851) __extension__ ({ \ - int32x4_t __ret_851; \ - int32x4_t __s0_851 = __p0_851; \ - int16x4_t __s1_851 = __p1_851; \ - int16x4_t __s2_851 = __p2_851; \ - __ret_851 = __s0_851 - vmull_s16(__s1_851, splat_lane_s16(__s2_851, __p3_851)); \ - __ret_851; \ +#define vmlsl_lane_s16(__p0_867, __p1_867, __p2_867, __p3_867) __extension__ ({ \ + int32x4_t __ret_867; \ + int32x4_t __s0_867 = __p0_867; \ + int16x4_t __s1_867 = __p1_867; \ + int16x4_t __s2_867 = __p2_867; \ + __ret_867 = __s0_867 - vmull_s16(__s1_867, splat_lane_s16(__s2_867, __p3_867)); \ + __ret_867; \ }) #else -#define vmlsl_lane_s16(__p0_852, __p1_852, __p2_852, __p3_852) __extension__ ({ \ - int32x4_t __ret_852; \ - int32x4_t __s0_852 = __p0_852; \ - int16x4_t __s1_852 = __p1_852; \ - int16x4_t __s2_852 = __p2_852; \ - int32x4_t __rev0_852; __rev0_852 = __builtin_shufflevector(__s0_852, __s0_852, 3, 2, 1, 0); \ - int16x4_t __rev1_852; __rev1_852 = __builtin_shufflevector(__s1_852, __s1_852, 3, 2, 1, 0); \ - int16x4_t __rev2_852; __rev2_852 = __builtin_shufflevector(__s2_852, __s2_852, 3, 2, 1, 0); \ - __ret_852 = __rev0_852 - __noswap_vmull_s16(__rev1_852, __noswap_splat_lane_s16(__rev2_852, __p3_852)); \ - __ret_852 = __builtin_shufflevector(__ret_852, __ret_852, 3, 2, 1, 0); \ - __ret_852; \ +#define vmlsl_lane_s16(__p0_868, __p1_868, __p2_868, __p3_868) __extension__ ({ \ + int32x4_t __ret_868; \ + int32x4_t __s0_868 = __p0_868; \ + int16x4_t __s1_868 = __p1_868; \ + int16x4_t __s2_868 = __p2_868; \ + int32x4_t __rev0_868; __rev0_868 = __builtin_shufflevector(__s0_868, __s0_868, __lane_reverse_128_32); \ + int16x4_t __rev1_868; __rev1_868 = __builtin_shufflevector(__s1_868, __s1_868, __lane_reverse_64_16); \ + int16x4_t __rev2_868; __rev2_868 = __builtin_shufflevector(__s2_868, __s2_868, __lane_reverse_64_16); \ + __ret_868 = __rev0_868 - __noswap_vmull_s16(__rev1_868, __noswap_splat_lane_s16(__rev2_868, __p3_868)); \ + __ret_868 = __builtin_shufflevector(__ret_868, __ret_868, __lane_reverse_128_32); \ + __ret_868; \ }) #endif @@ -69888,10 +72406,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlsl_n_u32(uint64x2_t __p0, uin #else __ai __attribute__((target("neon"))) uint64x2_t vmlsl_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __noswap_vmull_u32(__rev1, (uint32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vmlsl_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { @@ -69910,10 +72428,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlsl_n_u16(uint32x4_t __p0, uin #else __ai __attribute__((target("neon"))) uint32x4_t vmlsl_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __noswap_vmull_u16(__rev1, (uint16x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vmlsl_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { @@ -69932,10 +72450,10 @@ __ai __attribute__((target("neon"))) int64x2_t vmlsl_n_s32(int64x2_t __p0, int32 #else __ai __attribute__((target("neon"))) int64x2_t vmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); __ret = __rev0 - __noswap_vmull_s32(__rev1, (int32x2_t) {__p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { @@ -69954,10 +72472,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsl_n_s16(int32x4_t __p0, int16 #else __ai __attribute__((target("neon"))) int32x4_t vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); __ret = __rev0 - __noswap_vmull_s16(__rev1, (int16x4_t) {__p2, __p2, __p2, __p2}); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { @@ -69968,42 +72486,42 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_n_s16(int32x4_t __ #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_f16(__p0_853, __p1_853, __p2_853) __extension__ ({ \ - float16x4_t __ret_853; \ - float16_t __s0_853 = __p0_853; \ - float16x4_t __s1_853 = __p1_853; \ - __ret_853 = __builtin_bit_cast(float16x4_t, vset_lane_s16(__builtin_bit_cast(int16_t, __s0_853), __builtin_bit_cast(int16x4_t, __s1_853), __p2_853)); \ - __ret_853; \ +#define vset_lane_f16(__p0_869, __p1_869, __p2_869) __extension__ ({ \ + float16x4_t __ret_869; \ + float16_t __s0_869 = __p0_869; \ + float16x4_t __s1_869 = __p1_869; \ + __ret_869 = __builtin_bit_cast(float16x4_t, vset_lane_s16(__builtin_bit_cast(int16_t, __s0_869), __builtin_bit_cast(int16x4_t, __s1_869), __p2_869)); \ + __ret_869; \ }) #else -#define vset_lane_f16(__p0_854, __p1_854, __p2_854) __extension__ ({ \ - float16x4_t __ret_854; \ - float16_t __s0_854 = __p0_854; \ - float16x4_t __s1_854 = __p1_854; \ - float16x4_t __rev1_854; __rev1_854 = __builtin_shufflevector(__s1_854, __s1_854, 3, 2, 1, 0); \ - __ret_854 = __builtin_bit_cast(float16x4_t, __noswap_vset_lane_s16(__builtin_bit_cast(int16_t, __s0_854), __builtin_bit_cast(int16x4_t, __rev1_854), __p2_854)); \ - __ret_854 = __builtin_shufflevector(__ret_854, __ret_854, 3, 2, 1, 0); \ - __ret_854; \ +#define vset_lane_f16(__p0_870, __p1_870, __p2_870) __extension__ ({ \ + float16x4_t __ret_870; \ + float16_t __s0_870 = __p0_870; \ + float16x4_t __s1_870 = __p1_870; \ + float16x4_t __rev1_870; __rev1_870 = __builtin_shufflevector(__s1_870, __s1_870, __lane_reverse_64_16); \ + __ret_870 = __builtin_bit_cast(float16x4_t, __noswap_vset_lane_s16(__builtin_bit_cast(int16_t, __s0_870), __builtin_bit_cast(int16x4_t, __rev1_870), __p2_870)); \ + __ret_870 = __builtin_shufflevector(__ret_870, __ret_870, __lane_reverse_64_16); \ + __ret_870; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_f16(__p0_855, __p1_855, __p2_855) __extension__ ({ \ - float16x8_t __ret_855; \ - float16_t __s0_855 = __p0_855; \ - float16x8_t __s1_855 = __p1_855; \ - __ret_855 = __builtin_bit_cast(float16x8_t, vsetq_lane_s16(__builtin_bit_cast(int16_t, __s0_855), __builtin_bit_cast(int16x8_t, __s1_855), __p2_855)); \ - __ret_855; \ +#define vsetq_lane_f16(__p0_871, __p1_871, __p2_871) __extension__ ({ \ + float16x8_t __ret_871; \ + float16_t __s0_871 = __p0_871; \ + float16x8_t __s1_871 = __p1_871; \ + __ret_871 = __builtin_bit_cast(float16x8_t, vsetq_lane_s16(__builtin_bit_cast(int16_t, __s0_871), __builtin_bit_cast(int16x8_t, __s1_871), __p2_871)); \ + __ret_871; \ }) #else -#define vsetq_lane_f16(__p0_856, __p1_856, __p2_856) __extension__ ({ \ - float16x8_t __ret_856; \ - float16_t __s0_856 = __p0_856; \ - float16x8_t __s1_856 = __p1_856; \ - float16x8_t __rev1_856; __rev1_856 = __builtin_shufflevector(__s1_856, __s1_856, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_856 = __builtin_bit_cast(float16x8_t, __noswap_vsetq_lane_s16(__builtin_bit_cast(int16_t, __s0_856), __builtin_bit_cast(int16x8_t, __rev1_856), __p2_856)); \ - __ret_856 = __builtin_shufflevector(__ret_856, __ret_856, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_856; \ +#define vsetq_lane_f16(__p0_872, __p1_872, __p2_872) __extension__ ({ \ + float16x8_t __ret_872; \ + float16_t __s0_872 = __p0_872; \ + float16x8_t __s1_872 = __p1_872; \ + float16x8_t __rev1_872; __rev1_872 = __builtin_shufflevector(__s1_872, __s1_872, __lane_reverse_128_16); \ + __ret_872 = __builtin_bit_cast(float16x8_t, __noswap_vsetq_lane_s16(__builtin_bit_cast(int16_t, __s0_872), __builtin_bit_cast(int16x8_t, __rev1_872), __p2_872)); \ + __ret_872 = __builtin_shufflevector(__ret_872, __ret_872, __lane_reverse_128_16); \ + __ret_872; \ }) #endif @@ -70011,441 +72529,441 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_n_s16(int32x4_t __ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("aes,neon"))) poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { poly128_t __ret; - __ret = vmull_p64((poly64_t)(vget_high_p64(__p0)), (poly64_t)(vget_high_p64(__p1))); + __ret = vmull_p64(__builtin_bit_cast(poly64_t, vget_high_p64(__p0)), __builtin_bit_cast(poly64_t, vget_high_p64(__p1))); return __ret; } #else __ai __attribute__((target("aes,neon"))) poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { poly128_t __ret; - poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - __ret = vmull_p64((poly64_t)(__noswap_vget_high_p64(__rev0)), (poly64_t)(__noswap_vget_high_p64(__rev1))); + poly64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + poly64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_64); + __ret = vmull_p64(__builtin_bit_cast(poly64_t, __noswap_vget_high_p64(__rev0)), __builtin_bit_cast(poly64_t, __noswap_vget_high_p64(__rev1))); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_high_f16(__p0_857, __p1_857, __p2_857, __p3_857) __extension__ ({ \ - float32x4_t __ret_857; \ - float32x4_t __s0_857 = __p0_857; \ - float16x8_t __s1_857 = __p1_857; \ - float16x4_t __s2_857 = __p2_857; \ - __ret_857 = vfmlalq_high_f16(__s0_857, __s1_857, (float16x8_t) {vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857)}); \ - __ret_857; \ -}) -#else -#define vfmlalq_lane_high_f16(__p0_858, __p1_858, __p2_858, __p3_858) __extension__ ({ \ - float32x4_t __ret_858; \ - float32x4_t __s0_858 = __p0_858; \ - float16x8_t __s1_858 = __p1_858; \ - float16x4_t __s2_858 = __p2_858; \ - float32x4_t __rev0_858; __rev0_858 = __builtin_shufflevector(__s0_858, __s0_858, 3, 2, 1, 0); \ - float16x8_t __rev1_858; __rev1_858 = __builtin_shufflevector(__s1_858, __s1_858, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_858; __rev2_858 = __builtin_shufflevector(__s2_858, __s2_858, 3, 2, 1, 0); \ - __ret_858 = __noswap_vfmlalq_high_f16(__rev0_858, __rev1_858, (float16x8_t) {__noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858)}); \ - __ret_858 = __builtin_shufflevector(__ret_858, __ret_858, 3, 2, 1, 0); \ - __ret_858; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_high_f16(__p0_859, __p1_859, __p2_859, __p3_859) __extension__ ({ \ - float32x2_t __ret_859; \ - float32x2_t __s0_859 = __p0_859; \ - float16x4_t __s1_859 = __p1_859; \ - float16x4_t __s2_859 = __p2_859; \ - __ret_859 = vfmlal_high_f16(__s0_859, __s1_859, (float16x4_t) {vget_lane_f16(__s2_859, __p3_859), vget_lane_f16(__s2_859, __p3_859), vget_lane_f16(__s2_859, __p3_859), vget_lane_f16(__s2_859, __p3_859)}); \ - __ret_859; \ -}) -#else -#define vfmlal_lane_high_f16(__p0_860, __p1_860, __p2_860, __p3_860) __extension__ ({ \ - float32x2_t __ret_860; \ - float32x2_t __s0_860 = __p0_860; \ - float16x4_t __s1_860 = __p1_860; \ - float16x4_t __s2_860 = __p2_860; \ - float32x2_t __rev0_860; __rev0_860 = __builtin_shufflevector(__s0_860, __s0_860, 1, 0); \ - float16x4_t __rev1_860; __rev1_860 = __builtin_shufflevector(__s1_860, __s1_860, 3, 2, 1, 0); \ - float16x4_t __rev2_860; __rev2_860 = __builtin_shufflevector(__s2_860, __s2_860, 3, 2, 1, 0); \ - __ret_860 = __noswap_vfmlal_high_f16(__rev0_860, __rev1_860, (float16x4_t) {__noswap_vget_lane_f16(__rev2_860, __p3_860), __noswap_vget_lane_f16(__rev2_860, __p3_860), __noswap_vget_lane_f16(__rev2_860, __p3_860), __noswap_vget_lane_f16(__rev2_860, __p3_860)}); \ - __ret_860 = __builtin_shufflevector(__ret_860, __ret_860, 1, 0); \ - __ret_860; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_low_f16(__p0_861, __p1_861, __p2_861, __p3_861) __extension__ ({ \ - float32x4_t __ret_861; \ - float32x4_t __s0_861 = __p0_861; \ - float16x8_t __s1_861 = __p1_861; \ - float16x4_t __s2_861 = __p2_861; \ - __ret_861 = vfmlalq_low_f16(__s0_861, __s1_861, (float16x8_t) {vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861)}); \ - __ret_861; \ -}) -#else -#define vfmlalq_lane_low_f16(__p0_862, __p1_862, __p2_862, __p3_862) __extension__ ({ \ - float32x4_t __ret_862; \ - float32x4_t __s0_862 = __p0_862; \ - float16x8_t __s1_862 = __p1_862; \ - float16x4_t __s2_862 = __p2_862; \ - float32x4_t __rev0_862; __rev0_862 = __builtin_shufflevector(__s0_862, __s0_862, 3, 2, 1, 0); \ - float16x8_t __rev1_862; __rev1_862 = __builtin_shufflevector(__s1_862, __s1_862, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_862; __rev2_862 = __builtin_shufflevector(__s2_862, __s2_862, 3, 2, 1, 0); \ - __ret_862 = __noswap_vfmlalq_low_f16(__rev0_862, __rev1_862, (float16x8_t) {__noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862)}); \ - __ret_862 = __builtin_shufflevector(__ret_862, __ret_862, 3, 2, 1, 0); \ - __ret_862; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_low_f16(__p0_863, __p1_863, __p2_863, __p3_863) __extension__ ({ \ - float32x2_t __ret_863; \ - float32x2_t __s0_863 = __p0_863; \ - float16x4_t __s1_863 = __p1_863; \ - float16x4_t __s2_863 = __p2_863; \ - __ret_863 = vfmlal_low_f16(__s0_863, __s1_863, (float16x4_t) {vget_lane_f16(__s2_863, __p3_863), vget_lane_f16(__s2_863, __p3_863), vget_lane_f16(__s2_863, __p3_863), vget_lane_f16(__s2_863, __p3_863)}); \ - __ret_863; \ -}) -#else -#define vfmlal_lane_low_f16(__p0_864, __p1_864, __p2_864, __p3_864) __extension__ ({ \ - float32x2_t __ret_864; \ - float32x2_t __s0_864 = __p0_864; \ - float16x4_t __s1_864 = __p1_864; \ - float16x4_t __s2_864 = __p2_864; \ - float32x2_t __rev0_864; __rev0_864 = __builtin_shufflevector(__s0_864, __s0_864, 1, 0); \ - float16x4_t __rev1_864; __rev1_864 = __builtin_shufflevector(__s1_864, __s1_864, 3, 2, 1, 0); \ - float16x4_t __rev2_864; __rev2_864 = __builtin_shufflevector(__s2_864, __s2_864, 3, 2, 1, 0); \ - __ret_864 = __noswap_vfmlal_low_f16(__rev0_864, __rev1_864, (float16x4_t) {__noswap_vget_lane_f16(__rev2_864, __p3_864), __noswap_vget_lane_f16(__rev2_864, __p3_864), __noswap_vget_lane_f16(__rev2_864, __p3_864), __noswap_vget_lane_f16(__rev2_864, __p3_864)}); \ - __ret_864 = __builtin_shufflevector(__ret_864, __ret_864, 1, 0); \ - __ret_864; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_high_f16(__p0_865, __p1_865, __p2_865, __p3_865) __extension__ ({ \ - float32x4_t __ret_865; \ - float32x4_t __s0_865 = __p0_865; \ - float16x8_t __s1_865 = __p1_865; \ - float16x8_t __s2_865 = __p2_865; \ - __ret_865 = vfmlalq_high_f16(__s0_865, __s1_865, (float16x8_t) {vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865)}); \ - __ret_865; \ -}) -#else -#define vfmlalq_laneq_high_f16(__p0_866, __p1_866, __p2_866, __p3_866) __extension__ ({ \ - float32x4_t __ret_866; \ - float32x4_t __s0_866 = __p0_866; \ - float16x8_t __s1_866 = __p1_866; \ - float16x8_t __s2_866 = __p2_866; \ - float32x4_t __rev0_866; __rev0_866 = __builtin_shufflevector(__s0_866, __s0_866, 3, 2, 1, 0); \ - float16x8_t __rev1_866; __rev1_866 = __builtin_shufflevector(__s1_866, __s1_866, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_866; __rev2_866 = __builtin_shufflevector(__s2_866, __s2_866, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_866 = __noswap_vfmlalq_high_f16(__rev0_866, __rev1_866, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866)}); \ - __ret_866 = __builtin_shufflevector(__ret_866, __ret_866, 3, 2, 1, 0); \ - __ret_866; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_high_f16(__p0_867, __p1_867, __p2_867, __p3_867) __extension__ ({ \ - float32x2_t __ret_867; \ - float32x2_t __s0_867 = __p0_867; \ - float16x4_t __s1_867 = __p1_867; \ - float16x8_t __s2_867 = __p2_867; \ - __ret_867 = vfmlal_high_f16(__s0_867, __s1_867, (float16x4_t) {vgetq_lane_f16(__s2_867, __p3_867), vgetq_lane_f16(__s2_867, __p3_867), vgetq_lane_f16(__s2_867, __p3_867), vgetq_lane_f16(__s2_867, __p3_867)}); \ - __ret_867; \ -}) -#else -#define vfmlal_laneq_high_f16(__p0_868, __p1_868, __p2_868, __p3_868) __extension__ ({ \ - float32x2_t __ret_868; \ - float32x2_t __s0_868 = __p0_868; \ - float16x4_t __s1_868 = __p1_868; \ - float16x8_t __s2_868 = __p2_868; \ - float32x2_t __rev0_868; __rev0_868 = __builtin_shufflevector(__s0_868, __s0_868, 1, 0); \ - float16x4_t __rev1_868; __rev1_868 = __builtin_shufflevector(__s1_868, __s1_868, 3, 2, 1, 0); \ - float16x8_t __rev2_868; __rev2_868 = __builtin_shufflevector(__s2_868, __s2_868, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_868 = __noswap_vfmlal_high_f16(__rev0_868, __rev1_868, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_868, __p3_868), __noswap_vgetq_lane_f16(__rev2_868, __p3_868), __noswap_vgetq_lane_f16(__rev2_868, __p3_868), __noswap_vgetq_lane_f16(__rev2_868, __p3_868)}); \ - __ret_868 = __builtin_shufflevector(__ret_868, __ret_868, 1, 0); \ - __ret_868; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_low_f16(__p0_869, __p1_869, __p2_869, __p3_869) __extension__ ({ \ - float32x4_t __ret_869; \ - float32x4_t __s0_869 = __p0_869; \ - float16x8_t __s1_869 = __p1_869; \ - float16x8_t __s2_869 = __p2_869; \ - __ret_869 = vfmlalq_low_f16(__s0_869, __s1_869, (float16x8_t) {vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869)}); \ - __ret_869; \ -}) -#else -#define vfmlalq_laneq_low_f16(__p0_870, __p1_870, __p2_870, __p3_870) __extension__ ({ \ - float32x4_t __ret_870; \ - float32x4_t __s0_870 = __p0_870; \ - float16x8_t __s1_870 = __p1_870; \ - float16x8_t __s2_870 = __p2_870; \ - float32x4_t __rev0_870; __rev0_870 = __builtin_shufflevector(__s0_870, __s0_870, 3, 2, 1, 0); \ - float16x8_t __rev1_870; __rev1_870 = __builtin_shufflevector(__s1_870, __s1_870, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_870; __rev2_870 = __builtin_shufflevector(__s2_870, __s2_870, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_870 = __noswap_vfmlalq_low_f16(__rev0_870, __rev1_870, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870)}); \ - __ret_870 = __builtin_shufflevector(__ret_870, __ret_870, 3, 2, 1, 0); \ - __ret_870; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_low_f16(__p0_871, __p1_871, __p2_871, __p3_871) __extension__ ({ \ - float32x2_t __ret_871; \ - float32x2_t __s0_871 = __p0_871; \ - float16x4_t __s1_871 = __p1_871; \ - float16x8_t __s2_871 = __p2_871; \ - __ret_871 = vfmlal_low_f16(__s0_871, __s1_871, (float16x4_t) {vgetq_lane_f16(__s2_871, __p3_871), vgetq_lane_f16(__s2_871, __p3_871), vgetq_lane_f16(__s2_871, __p3_871), vgetq_lane_f16(__s2_871, __p3_871)}); \ - __ret_871; \ -}) -#else -#define vfmlal_laneq_low_f16(__p0_872, __p1_872, __p2_872, __p3_872) __extension__ ({ \ - float32x2_t __ret_872; \ - float32x2_t __s0_872 = __p0_872; \ - float16x4_t __s1_872 = __p1_872; \ - float16x8_t __s2_872 = __p2_872; \ - float32x2_t __rev0_872; __rev0_872 = __builtin_shufflevector(__s0_872, __s0_872, 1, 0); \ - float16x4_t __rev1_872; __rev1_872 = __builtin_shufflevector(__s1_872, __s1_872, 3, 2, 1, 0); \ - float16x8_t __rev2_872; __rev2_872 = __builtin_shufflevector(__s2_872, __s2_872, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_872 = __noswap_vfmlal_low_f16(__rev0_872, __rev1_872, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_872, __p3_872), __noswap_vgetq_lane_f16(__rev2_872, __p3_872), __noswap_vgetq_lane_f16(__rev2_872, __p3_872), __noswap_vgetq_lane_f16(__rev2_872, __p3_872)}); \ - __ret_872 = __builtin_shufflevector(__ret_872, __ret_872, 1, 0); \ - __ret_872; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_high_f16(__p0_873, __p1_873, __p2_873, __p3_873) __extension__ ({ \ +#define vfmlalq_lane_high_f16(__p0_873, __p1_873, __p2_873, __p3_873) __extension__ ({ \ float32x4_t __ret_873; \ float32x4_t __s0_873 = __p0_873; \ float16x8_t __s1_873 = __p1_873; \ float16x4_t __s2_873 = __p2_873; \ - __ret_873 = vfmlslq_high_f16(__s0_873, __s1_873, (float16x8_t) {vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873)}); \ + __ret_873 = vfmlalq_high_f16(__s0_873, __s1_873, (float16x8_t) {vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873)}); \ __ret_873; \ }) #else -#define vfmlslq_lane_high_f16(__p0_874, __p1_874, __p2_874, __p3_874) __extension__ ({ \ +#define vfmlalq_lane_high_f16(__p0_874, __p1_874, __p2_874, __p3_874) __extension__ ({ \ float32x4_t __ret_874; \ float32x4_t __s0_874 = __p0_874; \ float16x8_t __s1_874 = __p1_874; \ float16x4_t __s2_874 = __p2_874; \ - float32x4_t __rev0_874; __rev0_874 = __builtin_shufflevector(__s0_874, __s0_874, 3, 2, 1, 0); \ - float16x8_t __rev1_874; __rev1_874 = __builtin_shufflevector(__s1_874, __s1_874, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_874; __rev2_874 = __builtin_shufflevector(__s2_874, __s2_874, 3, 2, 1, 0); \ - __ret_874 = __noswap_vfmlslq_high_f16(__rev0_874, __rev1_874, (float16x8_t) {__noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874)}); \ - __ret_874 = __builtin_shufflevector(__ret_874, __ret_874, 3, 2, 1, 0); \ + float32x4_t __rev0_874; __rev0_874 = __builtin_shufflevector(__s0_874, __s0_874, __lane_reverse_128_32); \ + float16x8_t __rev1_874; __rev1_874 = __builtin_shufflevector(__s1_874, __s1_874, __lane_reverse_128_16); \ + float16x4_t __rev2_874; __rev2_874 = __builtin_shufflevector(__s2_874, __s2_874, __lane_reverse_64_16); \ + __ret_874 = __noswap_vfmlalq_high_f16(__rev0_874, __rev1_874, (float16x8_t) {__noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874)}); \ + __ret_874 = __builtin_shufflevector(__ret_874, __ret_874, __lane_reverse_128_32); \ __ret_874; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_high_f16(__p0_875, __p1_875, __p2_875, __p3_875) __extension__ ({ \ +#define vfmlal_lane_high_f16(__p0_875, __p1_875, __p2_875, __p3_875) __extension__ ({ \ float32x2_t __ret_875; \ float32x2_t __s0_875 = __p0_875; \ float16x4_t __s1_875 = __p1_875; \ float16x4_t __s2_875 = __p2_875; \ - __ret_875 = vfmlsl_high_f16(__s0_875, __s1_875, (float16x4_t) {vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875)}); \ + __ret_875 = vfmlal_high_f16(__s0_875, __s1_875, (float16x4_t) {vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875)}); \ __ret_875; \ }) #else -#define vfmlsl_lane_high_f16(__p0_876, __p1_876, __p2_876, __p3_876) __extension__ ({ \ +#define vfmlal_lane_high_f16(__p0_876, __p1_876, __p2_876, __p3_876) __extension__ ({ \ float32x2_t __ret_876; \ float32x2_t __s0_876 = __p0_876; \ float16x4_t __s1_876 = __p1_876; \ float16x4_t __s2_876 = __p2_876; \ - float32x2_t __rev0_876; __rev0_876 = __builtin_shufflevector(__s0_876, __s0_876, 1, 0); \ - float16x4_t __rev1_876; __rev1_876 = __builtin_shufflevector(__s1_876, __s1_876, 3, 2, 1, 0); \ - float16x4_t __rev2_876; __rev2_876 = __builtin_shufflevector(__s2_876, __s2_876, 3, 2, 1, 0); \ - __ret_876 = __noswap_vfmlsl_high_f16(__rev0_876, __rev1_876, (float16x4_t) {__noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876)}); \ - __ret_876 = __builtin_shufflevector(__ret_876, __ret_876, 1, 0); \ + float32x2_t __rev0_876; __rev0_876 = __builtin_shufflevector(__s0_876, __s0_876, __lane_reverse_64_32); \ + float16x4_t __rev1_876; __rev1_876 = __builtin_shufflevector(__s1_876, __s1_876, __lane_reverse_64_16); \ + float16x4_t __rev2_876; __rev2_876 = __builtin_shufflevector(__s2_876, __s2_876, __lane_reverse_64_16); \ + __ret_876 = __noswap_vfmlal_high_f16(__rev0_876, __rev1_876, (float16x4_t) {__noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876)}); \ + __ret_876 = __builtin_shufflevector(__ret_876, __ret_876, __lane_reverse_64_32); \ __ret_876; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_low_f16(__p0_877, __p1_877, __p2_877, __p3_877) __extension__ ({ \ +#define vfmlalq_lane_low_f16(__p0_877, __p1_877, __p2_877, __p3_877) __extension__ ({ \ float32x4_t __ret_877; \ float32x4_t __s0_877 = __p0_877; \ float16x8_t __s1_877 = __p1_877; \ float16x4_t __s2_877 = __p2_877; \ - __ret_877 = vfmlslq_low_f16(__s0_877, __s1_877, (float16x8_t) {vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877)}); \ + __ret_877 = vfmlalq_low_f16(__s0_877, __s1_877, (float16x8_t) {vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877)}); \ __ret_877; \ }) #else -#define vfmlslq_lane_low_f16(__p0_878, __p1_878, __p2_878, __p3_878) __extension__ ({ \ +#define vfmlalq_lane_low_f16(__p0_878, __p1_878, __p2_878, __p3_878) __extension__ ({ \ float32x4_t __ret_878; \ float32x4_t __s0_878 = __p0_878; \ float16x8_t __s1_878 = __p1_878; \ float16x4_t __s2_878 = __p2_878; \ - float32x4_t __rev0_878; __rev0_878 = __builtin_shufflevector(__s0_878, __s0_878, 3, 2, 1, 0); \ - float16x8_t __rev1_878; __rev1_878 = __builtin_shufflevector(__s1_878, __s1_878, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_878; __rev2_878 = __builtin_shufflevector(__s2_878, __s2_878, 3, 2, 1, 0); \ - __ret_878 = __noswap_vfmlslq_low_f16(__rev0_878, __rev1_878, (float16x8_t) {__noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878)}); \ - __ret_878 = __builtin_shufflevector(__ret_878, __ret_878, 3, 2, 1, 0); \ + float32x4_t __rev0_878; __rev0_878 = __builtin_shufflevector(__s0_878, __s0_878, __lane_reverse_128_32); \ + float16x8_t __rev1_878; __rev1_878 = __builtin_shufflevector(__s1_878, __s1_878, __lane_reverse_128_16); \ + float16x4_t __rev2_878; __rev2_878 = __builtin_shufflevector(__s2_878, __s2_878, __lane_reverse_64_16); \ + __ret_878 = __noswap_vfmlalq_low_f16(__rev0_878, __rev1_878, (float16x8_t) {__noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878)}); \ + __ret_878 = __builtin_shufflevector(__ret_878, __ret_878, __lane_reverse_128_32); \ __ret_878; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_low_f16(__p0_879, __p1_879, __p2_879, __p3_879) __extension__ ({ \ +#define vfmlal_lane_low_f16(__p0_879, __p1_879, __p2_879, __p3_879) __extension__ ({ \ float32x2_t __ret_879; \ float32x2_t __s0_879 = __p0_879; \ float16x4_t __s1_879 = __p1_879; \ float16x4_t __s2_879 = __p2_879; \ - __ret_879 = vfmlsl_low_f16(__s0_879, __s1_879, (float16x4_t) {vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879)}); \ + __ret_879 = vfmlal_low_f16(__s0_879, __s1_879, (float16x4_t) {vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879)}); \ __ret_879; \ }) #else -#define vfmlsl_lane_low_f16(__p0_880, __p1_880, __p2_880, __p3_880) __extension__ ({ \ +#define vfmlal_lane_low_f16(__p0_880, __p1_880, __p2_880, __p3_880) __extension__ ({ \ float32x2_t __ret_880; \ float32x2_t __s0_880 = __p0_880; \ float16x4_t __s1_880 = __p1_880; \ float16x4_t __s2_880 = __p2_880; \ - float32x2_t __rev0_880; __rev0_880 = __builtin_shufflevector(__s0_880, __s0_880, 1, 0); \ - float16x4_t __rev1_880; __rev1_880 = __builtin_shufflevector(__s1_880, __s1_880, 3, 2, 1, 0); \ - float16x4_t __rev2_880; __rev2_880 = __builtin_shufflevector(__s2_880, __s2_880, 3, 2, 1, 0); \ - __ret_880 = __noswap_vfmlsl_low_f16(__rev0_880, __rev1_880, (float16x4_t) {__noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880)}); \ - __ret_880 = __builtin_shufflevector(__ret_880, __ret_880, 1, 0); \ + float32x2_t __rev0_880; __rev0_880 = __builtin_shufflevector(__s0_880, __s0_880, __lane_reverse_64_32); \ + float16x4_t __rev1_880; __rev1_880 = __builtin_shufflevector(__s1_880, __s1_880, __lane_reverse_64_16); \ + float16x4_t __rev2_880; __rev2_880 = __builtin_shufflevector(__s2_880, __s2_880, __lane_reverse_64_16); \ + __ret_880 = __noswap_vfmlal_low_f16(__rev0_880, __rev1_880, (float16x4_t) {__noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880)}); \ + __ret_880 = __builtin_shufflevector(__ret_880, __ret_880, __lane_reverse_64_32); \ __ret_880; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_high_f16(__p0_881, __p1_881, __p2_881, __p3_881) __extension__ ({ \ +#define vfmlalq_laneq_high_f16(__p0_881, __p1_881, __p2_881, __p3_881) __extension__ ({ \ float32x4_t __ret_881; \ float32x4_t __s0_881 = __p0_881; \ float16x8_t __s1_881 = __p1_881; \ float16x8_t __s2_881 = __p2_881; \ - __ret_881 = vfmlslq_high_f16(__s0_881, __s1_881, (float16x8_t) {vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881)}); \ + __ret_881 = vfmlalq_high_f16(__s0_881, __s1_881, (float16x8_t) {vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881)}); \ __ret_881; \ }) #else -#define vfmlslq_laneq_high_f16(__p0_882, __p1_882, __p2_882, __p3_882) __extension__ ({ \ +#define vfmlalq_laneq_high_f16(__p0_882, __p1_882, __p2_882, __p3_882) __extension__ ({ \ float32x4_t __ret_882; \ float32x4_t __s0_882 = __p0_882; \ float16x8_t __s1_882 = __p1_882; \ float16x8_t __s2_882 = __p2_882; \ - float32x4_t __rev0_882; __rev0_882 = __builtin_shufflevector(__s0_882, __s0_882, 3, 2, 1, 0); \ - float16x8_t __rev1_882; __rev1_882 = __builtin_shufflevector(__s1_882, __s1_882, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_882; __rev2_882 = __builtin_shufflevector(__s2_882, __s2_882, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_882 = __noswap_vfmlslq_high_f16(__rev0_882, __rev1_882, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882)}); \ - __ret_882 = __builtin_shufflevector(__ret_882, __ret_882, 3, 2, 1, 0); \ + float32x4_t __rev0_882; __rev0_882 = __builtin_shufflevector(__s0_882, __s0_882, __lane_reverse_128_32); \ + float16x8_t __rev1_882; __rev1_882 = __builtin_shufflevector(__s1_882, __s1_882, __lane_reverse_128_16); \ + float16x8_t __rev2_882; __rev2_882 = __builtin_shufflevector(__s2_882, __s2_882, __lane_reverse_128_16); \ + __ret_882 = __noswap_vfmlalq_high_f16(__rev0_882, __rev1_882, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882)}); \ + __ret_882 = __builtin_shufflevector(__ret_882, __ret_882, __lane_reverse_128_32); \ __ret_882; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_high_f16(__p0_883, __p1_883, __p2_883, __p3_883) __extension__ ({ \ +#define vfmlal_laneq_high_f16(__p0_883, __p1_883, __p2_883, __p3_883) __extension__ ({ \ float32x2_t __ret_883; \ float32x2_t __s0_883 = __p0_883; \ float16x4_t __s1_883 = __p1_883; \ float16x8_t __s2_883 = __p2_883; \ - __ret_883 = vfmlsl_high_f16(__s0_883, __s1_883, (float16x4_t) {vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883)}); \ + __ret_883 = vfmlal_high_f16(__s0_883, __s1_883, (float16x4_t) {vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883)}); \ __ret_883; \ }) #else -#define vfmlsl_laneq_high_f16(__p0_884, __p1_884, __p2_884, __p3_884) __extension__ ({ \ +#define vfmlal_laneq_high_f16(__p0_884, __p1_884, __p2_884, __p3_884) __extension__ ({ \ float32x2_t __ret_884; \ float32x2_t __s0_884 = __p0_884; \ float16x4_t __s1_884 = __p1_884; \ float16x8_t __s2_884 = __p2_884; \ - float32x2_t __rev0_884; __rev0_884 = __builtin_shufflevector(__s0_884, __s0_884, 1, 0); \ - float16x4_t __rev1_884; __rev1_884 = __builtin_shufflevector(__s1_884, __s1_884, 3, 2, 1, 0); \ - float16x8_t __rev2_884; __rev2_884 = __builtin_shufflevector(__s2_884, __s2_884, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_884 = __noswap_vfmlsl_high_f16(__rev0_884, __rev1_884, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884)}); \ - __ret_884 = __builtin_shufflevector(__ret_884, __ret_884, 1, 0); \ + float32x2_t __rev0_884; __rev0_884 = __builtin_shufflevector(__s0_884, __s0_884, __lane_reverse_64_32); \ + float16x4_t __rev1_884; __rev1_884 = __builtin_shufflevector(__s1_884, __s1_884, __lane_reverse_64_16); \ + float16x8_t __rev2_884; __rev2_884 = __builtin_shufflevector(__s2_884, __s2_884, __lane_reverse_128_16); \ + __ret_884 = __noswap_vfmlal_high_f16(__rev0_884, __rev1_884, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884)}); \ + __ret_884 = __builtin_shufflevector(__ret_884, __ret_884, __lane_reverse_64_32); \ __ret_884; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_low_f16(__p0_885, __p1_885, __p2_885, __p3_885) __extension__ ({ \ +#define vfmlalq_laneq_low_f16(__p0_885, __p1_885, __p2_885, __p3_885) __extension__ ({ \ float32x4_t __ret_885; \ float32x4_t __s0_885 = __p0_885; \ float16x8_t __s1_885 = __p1_885; \ float16x8_t __s2_885 = __p2_885; \ - __ret_885 = vfmlslq_low_f16(__s0_885, __s1_885, (float16x8_t) {vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885)}); \ + __ret_885 = vfmlalq_low_f16(__s0_885, __s1_885, (float16x8_t) {vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885)}); \ __ret_885; \ }) #else -#define vfmlslq_laneq_low_f16(__p0_886, __p1_886, __p2_886, __p3_886) __extension__ ({ \ +#define vfmlalq_laneq_low_f16(__p0_886, __p1_886, __p2_886, __p3_886) __extension__ ({ \ float32x4_t __ret_886; \ float32x4_t __s0_886 = __p0_886; \ float16x8_t __s1_886 = __p1_886; \ float16x8_t __s2_886 = __p2_886; \ - float32x4_t __rev0_886; __rev0_886 = __builtin_shufflevector(__s0_886, __s0_886, 3, 2, 1, 0); \ - float16x8_t __rev1_886; __rev1_886 = __builtin_shufflevector(__s1_886, __s1_886, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_886; __rev2_886 = __builtin_shufflevector(__s2_886, __s2_886, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_886 = __noswap_vfmlslq_low_f16(__rev0_886, __rev1_886, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886)}); \ - __ret_886 = __builtin_shufflevector(__ret_886, __ret_886, 3, 2, 1, 0); \ + float32x4_t __rev0_886; __rev0_886 = __builtin_shufflevector(__s0_886, __s0_886, __lane_reverse_128_32); \ + float16x8_t __rev1_886; __rev1_886 = __builtin_shufflevector(__s1_886, __s1_886, __lane_reverse_128_16); \ + float16x8_t __rev2_886; __rev2_886 = __builtin_shufflevector(__s2_886, __s2_886, __lane_reverse_128_16); \ + __ret_886 = __noswap_vfmlalq_low_f16(__rev0_886, __rev1_886, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886)}); \ + __ret_886 = __builtin_shufflevector(__ret_886, __ret_886, __lane_reverse_128_32); \ __ret_886; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_low_f16(__p0_887, __p1_887, __p2_887, __p3_887) __extension__ ({ \ +#define vfmlal_laneq_low_f16(__p0_887, __p1_887, __p2_887, __p3_887) __extension__ ({ \ float32x2_t __ret_887; \ float32x2_t __s0_887 = __p0_887; \ float16x4_t __s1_887 = __p1_887; \ float16x8_t __s2_887 = __p2_887; \ - __ret_887 = vfmlsl_low_f16(__s0_887, __s1_887, (float16x4_t) {vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887)}); \ + __ret_887 = vfmlal_low_f16(__s0_887, __s1_887, (float16x4_t) {vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887)}); \ __ret_887; \ }) #else -#define vfmlsl_laneq_low_f16(__p0_888, __p1_888, __p2_888, __p3_888) __extension__ ({ \ +#define vfmlal_laneq_low_f16(__p0_888, __p1_888, __p2_888, __p3_888) __extension__ ({ \ float32x2_t __ret_888; \ float32x2_t __s0_888 = __p0_888; \ float16x4_t __s1_888 = __p1_888; \ float16x8_t __s2_888 = __p2_888; \ - float32x2_t __rev0_888; __rev0_888 = __builtin_shufflevector(__s0_888, __s0_888, 1, 0); \ - float16x4_t __rev1_888; __rev1_888 = __builtin_shufflevector(__s1_888, __s1_888, 3, 2, 1, 0); \ - float16x8_t __rev2_888; __rev2_888 = __builtin_shufflevector(__s2_888, __s2_888, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_888 = __noswap_vfmlsl_low_f16(__rev0_888, __rev1_888, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888)}); \ - __ret_888 = __builtin_shufflevector(__ret_888, __ret_888, 1, 0); \ + float32x2_t __rev0_888; __rev0_888 = __builtin_shufflevector(__s0_888, __s0_888, __lane_reverse_64_32); \ + float16x4_t __rev1_888; __rev1_888 = __builtin_shufflevector(__s1_888, __s1_888, __lane_reverse_64_16); \ + float16x8_t __rev2_888; __rev2_888 = __builtin_shufflevector(__s2_888, __s2_888, __lane_reverse_128_16); \ + __ret_888 = __noswap_vfmlal_low_f16(__rev0_888, __rev1_888, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888)}); \ + __ret_888 = __builtin_shufflevector(__ret_888, __ret_888, __lane_reverse_64_32); \ __ret_888; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulh_lane_f16(__p0_889, __p1_889, __p2_889) __extension__ ({ \ - float16_t __ret_889; \ - float16_t __s0_889 = __p0_889; \ - float16x4_t __s1_889 = __p1_889; \ - __ret_889 = __s0_889 * vget_lane_f16(__s1_889, __p2_889); \ +#define vfmlslq_lane_high_f16(__p0_889, __p1_889, __p2_889, __p3_889) __extension__ ({ \ + float32x4_t __ret_889; \ + float32x4_t __s0_889 = __p0_889; \ + float16x8_t __s1_889 = __p1_889; \ + float16x4_t __s2_889 = __p2_889; \ + __ret_889 = vfmlslq_high_f16(__s0_889, __s1_889, (float16x8_t) {vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889)}); \ __ret_889; \ }) #else -#define vmulh_lane_f16(__p0_890, __p1_890, __p2_890) __extension__ ({ \ - float16_t __ret_890; \ - float16_t __s0_890 = __p0_890; \ - float16x4_t __s1_890 = __p1_890; \ - float16x4_t __rev1_890; __rev1_890 = __builtin_shufflevector(__s1_890, __s1_890, 3, 2, 1, 0); \ - __ret_890 = __s0_890 * __noswap_vget_lane_f16(__rev1_890, __p2_890); \ +#define vfmlslq_lane_high_f16(__p0_890, __p1_890, __p2_890, __p3_890) __extension__ ({ \ + float32x4_t __ret_890; \ + float32x4_t __s0_890 = __p0_890; \ + float16x8_t __s1_890 = __p1_890; \ + float16x4_t __s2_890 = __p2_890; \ + float32x4_t __rev0_890; __rev0_890 = __builtin_shufflevector(__s0_890, __s0_890, __lane_reverse_128_32); \ + float16x8_t __rev1_890; __rev1_890 = __builtin_shufflevector(__s1_890, __s1_890, __lane_reverse_128_16); \ + float16x4_t __rev2_890; __rev2_890 = __builtin_shufflevector(__s2_890, __s2_890, __lane_reverse_64_16); \ + __ret_890 = __noswap_vfmlslq_high_f16(__rev0_890, __rev1_890, (float16x8_t) {__noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890)}); \ + __ret_890 = __builtin_shufflevector(__ret_890, __ret_890, __lane_reverse_128_32); \ __ret_890; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulh_laneq_f16(__p0_891, __p1_891, __p2_891) __extension__ ({ \ - float16_t __ret_891; \ - float16_t __s0_891 = __p0_891; \ - float16x8_t __s1_891 = __p1_891; \ - __ret_891 = __s0_891 * vgetq_lane_f16(__s1_891, __p2_891); \ +#define vfmlsl_lane_high_f16(__p0_891, __p1_891, __p2_891, __p3_891) __extension__ ({ \ + float32x2_t __ret_891; \ + float32x2_t __s0_891 = __p0_891; \ + float16x4_t __s1_891 = __p1_891; \ + float16x4_t __s2_891 = __p2_891; \ + __ret_891 = vfmlsl_high_f16(__s0_891, __s1_891, (float16x4_t) {vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891)}); \ __ret_891; \ }) #else -#define vmulh_laneq_f16(__p0_892, __p1_892, __p2_892) __extension__ ({ \ - float16_t __ret_892; \ - float16_t __s0_892 = __p0_892; \ - float16x8_t __s1_892 = __p1_892; \ - float16x8_t __rev1_892; __rev1_892 = __builtin_shufflevector(__s1_892, __s1_892, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_892 = __s0_892 * __noswap_vgetq_lane_f16(__rev1_892, __p2_892); \ +#define vfmlsl_lane_high_f16(__p0_892, __p1_892, __p2_892, __p3_892) __extension__ ({ \ + float32x2_t __ret_892; \ + float32x2_t __s0_892 = __p0_892; \ + float16x4_t __s1_892 = __p1_892; \ + float16x4_t __s2_892 = __p2_892; \ + float32x2_t __rev0_892; __rev0_892 = __builtin_shufflevector(__s0_892, __s0_892, __lane_reverse_64_32); \ + float16x4_t __rev1_892; __rev1_892 = __builtin_shufflevector(__s1_892, __s1_892, __lane_reverse_64_16); \ + float16x4_t __rev2_892; __rev2_892 = __builtin_shufflevector(__s2_892, __s2_892, __lane_reverse_64_16); \ + __ret_892 = __noswap_vfmlsl_high_f16(__rev0_892, __rev1_892, (float16x4_t) {__noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892)}); \ + __ret_892 = __builtin_shufflevector(__ret_892, __ret_892, __lane_reverse_64_32); \ __ret_892; \ }) #endif +#ifdef __LITTLE_ENDIAN__ +#define vfmlslq_lane_low_f16(__p0_893, __p1_893, __p2_893, __p3_893) __extension__ ({ \ + float32x4_t __ret_893; \ + float32x4_t __s0_893 = __p0_893; \ + float16x8_t __s1_893 = __p1_893; \ + float16x4_t __s2_893 = __p2_893; \ + __ret_893 = vfmlslq_low_f16(__s0_893, __s1_893, (float16x8_t) {vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893), vget_lane_f16(__s2_893, __p3_893)}); \ + __ret_893; \ +}) +#else +#define vfmlslq_lane_low_f16(__p0_894, __p1_894, __p2_894, __p3_894) __extension__ ({ \ + float32x4_t __ret_894; \ + float32x4_t __s0_894 = __p0_894; \ + float16x8_t __s1_894 = __p1_894; \ + float16x4_t __s2_894 = __p2_894; \ + float32x4_t __rev0_894; __rev0_894 = __builtin_shufflevector(__s0_894, __s0_894, __lane_reverse_128_32); \ + float16x8_t __rev1_894; __rev1_894 = __builtin_shufflevector(__s1_894, __s1_894, __lane_reverse_128_16); \ + float16x4_t __rev2_894; __rev2_894 = __builtin_shufflevector(__s2_894, __s2_894, __lane_reverse_64_16); \ + __ret_894 = __noswap_vfmlslq_low_f16(__rev0_894, __rev1_894, (float16x8_t) {__noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894), __noswap_vget_lane_f16(__rev2_894, __p3_894)}); \ + __ret_894 = __builtin_shufflevector(__ret_894, __ret_894, __lane_reverse_128_32); \ + __ret_894; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlsl_lane_low_f16(__p0_895, __p1_895, __p2_895, __p3_895) __extension__ ({ \ + float32x2_t __ret_895; \ + float32x2_t __s0_895 = __p0_895; \ + float16x4_t __s1_895 = __p1_895; \ + float16x4_t __s2_895 = __p2_895; \ + __ret_895 = vfmlsl_low_f16(__s0_895, __s1_895, (float16x4_t) {vget_lane_f16(__s2_895, __p3_895), vget_lane_f16(__s2_895, __p3_895), vget_lane_f16(__s2_895, __p3_895), vget_lane_f16(__s2_895, __p3_895)}); \ + __ret_895; \ +}) +#else +#define vfmlsl_lane_low_f16(__p0_896, __p1_896, __p2_896, __p3_896) __extension__ ({ \ + float32x2_t __ret_896; \ + float32x2_t __s0_896 = __p0_896; \ + float16x4_t __s1_896 = __p1_896; \ + float16x4_t __s2_896 = __p2_896; \ + float32x2_t __rev0_896; __rev0_896 = __builtin_shufflevector(__s0_896, __s0_896, __lane_reverse_64_32); \ + float16x4_t __rev1_896; __rev1_896 = __builtin_shufflevector(__s1_896, __s1_896, __lane_reverse_64_16); \ + float16x4_t __rev2_896; __rev2_896 = __builtin_shufflevector(__s2_896, __s2_896, __lane_reverse_64_16); \ + __ret_896 = __noswap_vfmlsl_low_f16(__rev0_896, __rev1_896, (float16x4_t) {__noswap_vget_lane_f16(__rev2_896, __p3_896), __noswap_vget_lane_f16(__rev2_896, __p3_896), __noswap_vget_lane_f16(__rev2_896, __p3_896), __noswap_vget_lane_f16(__rev2_896, __p3_896)}); \ + __ret_896 = __builtin_shufflevector(__ret_896, __ret_896, __lane_reverse_64_32); \ + __ret_896; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlslq_laneq_high_f16(__p0_897, __p1_897, __p2_897, __p3_897) __extension__ ({ \ + float32x4_t __ret_897; \ + float32x4_t __s0_897 = __p0_897; \ + float16x8_t __s1_897 = __p1_897; \ + float16x8_t __s2_897 = __p2_897; \ + __ret_897 = vfmlslq_high_f16(__s0_897, __s1_897, (float16x8_t) {vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897)}); \ + __ret_897; \ +}) +#else +#define vfmlslq_laneq_high_f16(__p0_898, __p1_898, __p2_898, __p3_898) __extension__ ({ \ + float32x4_t __ret_898; \ + float32x4_t __s0_898 = __p0_898; \ + float16x8_t __s1_898 = __p1_898; \ + float16x8_t __s2_898 = __p2_898; \ + float32x4_t __rev0_898; __rev0_898 = __builtin_shufflevector(__s0_898, __s0_898, __lane_reverse_128_32); \ + float16x8_t __rev1_898; __rev1_898 = __builtin_shufflevector(__s1_898, __s1_898, __lane_reverse_128_16); \ + float16x8_t __rev2_898; __rev2_898 = __builtin_shufflevector(__s2_898, __s2_898, __lane_reverse_128_16); \ + __ret_898 = __noswap_vfmlslq_high_f16(__rev0_898, __rev1_898, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898)}); \ + __ret_898 = __builtin_shufflevector(__ret_898, __ret_898, __lane_reverse_128_32); \ + __ret_898; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlsl_laneq_high_f16(__p0_899, __p1_899, __p2_899, __p3_899) __extension__ ({ \ + float32x2_t __ret_899; \ + float32x2_t __s0_899 = __p0_899; \ + float16x4_t __s1_899 = __p1_899; \ + float16x8_t __s2_899 = __p2_899; \ + __ret_899 = vfmlsl_high_f16(__s0_899, __s1_899, (float16x4_t) {vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899)}); \ + __ret_899; \ +}) +#else +#define vfmlsl_laneq_high_f16(__p0_900, __p1_900, __p2_900, __p3_900) __extension__ ({ \ + float32x2_t __ret_900; \ + float32x2_t __s0_900 = __p0_900; \ + float16x4_t __s1_900 = __p1_900; \ + float16x8_t __s2_900 = __p2_900; \ + float32x2_t __rev0_900; __rev0_900 = __builtin_shufflevector(__s0_900, __s0_900, __lane_reverse_64_32); \ + float16x4_t __rev1_900; __rev1_900 = __builtin_shufflevector(__s1_900, __s1_900, __lane_reverse_64_16); \ + float16x8_t __rev2_900; __rev2_900 = __builtin_shufflevector(__s2_900, __s2_900, __lane_reverse_128_16); \ + __ret_900 = __noswap_vfmlsl_high_f16(__rev0_900, __rev1_900, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900)}); \ + __ret_900 = __builtin_shufflevector(__ret_900, __ret_900, __lane_reverse_64_32); \ + __ret_900; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlslq_laneq_low_f16(__p0_901, __p1_901, __p2_901, __p3_901) __extension__ ({ \ + float32x4_t __ret_901; \ + float32x4_t __s0_901 = __p0_901; \ + float16x8_t __s1_901 = __p1_901; \ + float16x8_t __s2_901 = __p2_901; \ + __ret_901 = vfmlslq_low_f16(__s0_901, __s1_901, (float16x8_t) {vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901), vgetq_lane_f16(__s2_901, __p3_901)}); \ + __ret_901; \ +}) +#else +#define vfmlslq_laneq_low_f16(__p0_902, __p1_902, __p2_902, __p3_902) __extension__ ({ \ + float32x4_t __ret_902; \ + float32x4_t __s0_902 = __p0_902; \ + float16x8_t __s1_902 = __p1_902; \ + float16x8_t __s2_902 = __p2_902; \ + float32x4_t __rev0_902; __rev0_902 = __builtin_shufflevector(__s0_902, __s0_902, __lane_reverse_128_32); \ + float16x8_t __rev1_902; __rev1_902 = __builtin_shufflevector(__s1_902, __s1_902, __lane_reverse_128_16); \ + float16x8_t __rev2_902; __rev2_902 = __builtin_shufflevector(__s2_902, __s2_902, __lane_reverse_128_16); \ + __ret_902 = __noswap_vfmlslq_low_f16(__rev0_902, __rev1_902, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902), __noswap_vgetq_lane_f16(__rev2_902, __p3_902)}); \ + __ret_902 = __builtin_shufflevector(__ret_902, __ret_902, __lane_reverse_128_32); \ + __ret_902; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vfmlsl_laneq_low_f16(__p0_903, __p1_903, __p2_903, __p3_903) __extension__ ({ \ + float32x2_t __ret_903; \ + float32x2_t __s0_903 = __p0_903; \ + float16x4_t __s1_903 = __p1_903; \ + float16x8_t __s2_903 = __p2_903; \ + __ret_903 = vfmlsl_low_f16(__s0_903, __s1_903, (float16x4_t) {vgetq_lane_f16(__s2_903, __p3_903), vgetq_lane_f16(__s2_903, __p3_903), vgetq_lane_f16(__s2_903, __p3_903), vgetq_lane_f16(__s2_903, __p3_903)}); \ + __ret_903; \ +}) +#else +#define vfmlsl_laneq_low_f16(__p0_904, __p1_904, __p2_904, __p3_904) __extension__ ({ \ + float32x2_t __ret_904; \ + float32x2_t __s0_904 = __p0_904; \ + float16x4_t __s1_904 = __p1_904; \ + float16x8_t __s2_904 = __p2_904; \ + float32x2_t __rev0_904; __rev0_904 = __builtin_shufflevector(__s0_904, __s0_904, __lane_reverse_64_32); \ + float16x4_t __rev1_904; __rev1_904 = __builtin_shufflevector(__s1_904, __s1_904, __lane_reverse_64_16); \ + float16x8_t __rev2_904; __rev2_904 = __builtin_shufflevector(__s2_904, __s2_904, __lane_reverse_128_16); \ + __ret_904 = __noswap_vfmlsl_low_f16(__rev0_904, __rev1_904, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_904, __p3_904), __noswap_vgetq_lane_f16(__rev2_904, __p3_904), __noswap_vgetq_lane_f16(__rev2_904, __p3_904), __noswap_vgetq_lane_f16(__rev2_904, __p3_904)}); \ + __ret_904 = __builtin_shufflevector(__ret_904, __ret_904, __lane_reverse_64_32); \ + __ret_904; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulh_lane_f16(__p0_905, __p1_905, __p2_905) __extension__ ({ \ + float16_t __ret_905; \ + float16_t __s0_905 = __p0_905; \ + float16x4_t __s1_905 = __p1_905; \ + __ret_905 = __s0_905 * vget_lane_f16(__s1_905, __p2_905); \ + __ret_905; \ +}) +#else +#define vmulh_lane_f16(__p0_906, __p1_906, __p2_906) __extension__ ({ \ + float16_t __ret_906; \ + float16_t __s0_906 = __p0_906; \ + float16x4_t __s1_906 = __p1_906; \ + float16x4_t __rev1_906; __rev1_906 = __builtin_shufflevector(__s1_906, __s1_906, __lane_reverse_64_16); \ + __ret_906 = __s0_906 * __noswap_vget_lane_f16(__rev1_906, __p2_906); \ + __ret_906; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmulh_laneq_f16(__p0_907, __p1_907, __p2_907) __extension__ ({ \ + float16_t __ret_907; \ + float16_t __s0_907 = __p0_907; \ + float16x8_t __s1_907 = __p1_907; \ + __ret_907 = __s0_907 * vgetq_lane_f16(__s1_907, __p2_907); \ + __ret_907; \ +}) +#else +#define vmulh_laneq_f16(__p0_908, __p1_908, __p2_908) __extension__ ({ \ + float16_t __ret_908; \ + float16_t __s0_908 = __p0_908; \ + float16x8_t __s1_908 = __p1_908; \ + float16x8_t __rev1_908; __rev1_908 = __builtin_shufflevector(__s1_908, __s1_908, __lane_reverse_128_16); \ + __ret_908 = __s0_908 * __noswap_vgetq_lane_f16(__rev1_908, __p2_908); \ + __ret_908; \ +}) +#endif + #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("neon"))) uint16x8_t vabdl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; @@ -70455,10 +72973,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vabdl_high_u8(uint8x16_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vabdl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vabdl_u8(__noswap_vget_high_u8(__rev0), __noswap_vget_high_u8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70472,10 +72990,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vabdl_high_u32(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vabdl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vabdl_u32(__noswap_vget_high_u32(__rev0), __noswap_vget_high_u32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70489,10 +73007,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vabdl_high_u16(uint16x8_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vabdl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vabdl_u16(__noswap_vget_high_u16(__rev0), __noswap_vget_high_u16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -70506,10 +73024,10 @@ __ai __attribute__((target("neon"))) int16x8_t vabdl_high_s8(int8x16_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vabdl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vabdl_s8(__noswap_vget_high_s8(__rev0), __noswap_vget_high_s8(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70523,10 +73041,10 @@ __ai __attribute__((target("neon"))) int64x2_t vabdl_high_s32(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vabdl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vabdl_s32(__noswap_vget_high_s32(__rev0), __noswap_vget_high_s32(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70540,10 +73058,10 @@ __ai __attribute__((target("neon"))) int32x4_t vabdl_high_s16(int16x8_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vabdl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vabdl_s16(__noswap_vget_high_s16(__rev0), __noswap_vget_high_s16(__rev1)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -70557,10 +73075,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vaddl_high_u8(uint8x16_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vaddl_high_u8(uint8x16_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmovl_high_u8(__rev0) + __noswap_vmovl_high_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70574,10 +73092,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vaddl_high_u32(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vaddl_high_u32(uint32x4_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmovl_high_u32(__rev0) + __noswap_vmovl_high_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70591,10 +73109,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vaddl_high_u16(uint16x8_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vaddl_high_u16(uint16x8_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmovl_high_u16(__rev0) + __noswap_vmovl_high_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -70608,10 +73126,10 @@ __ai __attribute__((target("neon"))) int16x8_t vaddl_high_s8(int8x16_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vaddl_high_s8(int8x16_t __p0, int8x16_t __p1) { int16x8_t __ret; - int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_8); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __noswap_vmovl_high_s8(__rev0) + __noswap_vmovl_high_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70625,10 +73143,10 @@ __ai __attribute__((target("neon"))) int64x2_t vaddl_high_s32(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vaddl_high_s32(int32x4_t __p0, int32x4_t __p1) { int64x2_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmovl_high_s32(__rev0) + __noswap_vmovl_high_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70642,10 +73160,10 @@ __ai __attribute__((target("neon"))) int32x4_t vaddl_high_s16(int16x8_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vaddl_high_s16(int16x8_t __p0, int16x8_t __p1) { int32x4_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmovl_high_s16(__rev0) + __noswap_vmovl_high_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -70659,10 +73177,10 @@ __ai __attribute__((target("neon"))) uint16x8_t vaddw_high_u8(uint16x8_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vaddw_high_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 + __noswap_vmovl_high_u8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70676,10 +73194,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vaddw_high_u32(uint64x2_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vaddw_high_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __noswap_vmovl_high_u32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70693,10 +73211,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vaddw_high_u16(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vaddw_high_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __noswap_vmovl_high_u16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -70710,10 +73228,10 @@ __ai __attribute__((target("neon"))) int16x8_t vaddw_high_s8(int16x8_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vaddw_high_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); __ret = __rev0 + __noswap_vmovl_high_s8(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70727,10 +73245,10 @@ __ai __attribute__((target("neon"))) int64x2_t vaddw_high_s32(int64x2_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vaddw_high_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __rev0 + __noswap_vmovl_high_s32(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70744,145 +73262,145 @@ __ai __attribute__((target("neon"))) int32x4_t vaddw_high_s16(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vaddw_high_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __rev0 + __noswap_vmovl_high_s16(__rev1); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p64(__p0_893, __p1_893, __p2_893, __p3_893) __extension__ ({ \ - poly64x2_t __ret_893; \ - poly64x2_t __s0_893 = __p0_893; \ - poly64x1_t __s2_893 = __p2_893; \ - __ret_893 = vsetq_lane_p64(vget_lane_p64(__s2_893, __p3_893), __s0_893, __p1_893); \ - __ret_893; \ +#define vcopyq_lane_p64(__p0_909, __p1_909, __p2_909, __p3_909) __extension__ ({ \ + poly64x2_t __ret_909; \ + poly64x2_t __s0_909 = __p0_909; \ + poly64x1_t __s2_909 = __p2_909; \ + __ret_909 = vsetq_lane_p64(vget_lane_p64(__s2_909, __p3_909), __s0_909, __p1_909); \ + __ret_909; \ }) #else -#define vcopyq_lane_p64(__p0_894, __p1_894, __p2_894, __p3_894) __extension__ ({ \ - poly64x2_t __ret_894; \ - poly64x2_t __s0_894 = __p0_894; \ - poly64x1_t __s2_894 = __p2_894; \ - poly64x2_t __rev0_894; __rev0_894 = __builtin_shufflevector(__s0_894, __s0_894, 1, 0); \ - __ret_894 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_894, __p3_894), __rev0_894, __p1_894); \ - __ret_894 = __builtin_shufflevector(__ret_894, __ret_894, 1, 0); \ - __ret_894; \ +#define vcopyq_lane_p64(__p0_910, __p1_910, __p2_910, __p3_910) __extension__ ({ \ + poly64x2_t __ret_910; \ + poly64x2_t __s0_910 = __p0_910; \ + poly64x1_t __s2_910 = __p2_910; \ + poly64x2_t __rev0_910; __rev0_910 = __builtin_shufflevector(__s0_910, __s0_910, __lane_reverse_128_64); \ + __ret_910 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_910, __p3_910), __rev0_910, __p1_910); \ + __ret_910 = __builtin_shufflevector(__ret_910, __ret_910, __lane_reverse_128_64); \ + __ret_910; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f64(__p0_895, __p1_895, __p2_895, __p3_895) __extension__ ({ \ - float64x2_t __ret_895; \ - float64x2_t __s0_895 = __p0_895; \ - float64x1_t __s2_895 = __p2_895; \ - __ret_895 = vsetq_lane_f64(vget_lane_f64(__s2_895, __p3_895), __s0_895, __p1_895); \ - __ret_895; \ +#define vcopyq_lane_f64(__p0_911, __p1_911, __p2_911, __p3_911) __extension__ ({ \ + float64x2_t __ret_911; \ + float64x2_t __s0_911 = __p0_911; \ + float64x1_t __s2_911 = __p2_911; \ + __ret_911 = vsetq_lane_f64(vget_lane_f64(__s2_911, __p3_911), __s0_911, __p1_911); \ + __ret_911; \ }) #else -#define vcopyq_lane_f64(__p0_896, __p1_896, __p2_896, __p3_896) __extension__ ({ \ - float64x2_t __ret_896; \ - float64x2_t __s0_896 = __p0_896; \ - float64x1_t __s2_896 = __p2_896; \ - float64x2_t __rev0_896; __rev0_896 = __builtin_shufflevector(__s0_896, __s0_896, 1, 0); \ - __ret_896 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_896, __p3_896), __rev0_896, __p1_896); \ - __ret_896 = __builtin_shufflevector(__ret_896, __ret_896, 1, 0); \ - __ret_896; \ +#define vcopyq_lane_f64(__p0_912, __p1_912, __p2_912, __p3_912) __extension__ ({ \ + float64x2_t __ret_912; \ + float64x2_t __s0_912 = __p0_912; \ + float64x1_t __s2_912 = __p2_912; \ + float64x2_t __rev0_912; __rev0_912 = __builtin_shufflevector(__s0_912, __s0_912, __lane_reverse_128_64); \ + __ret_912 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_912, __p3_912), __rev0_912, __p1_912); \ + __ret_912 = __builtin_shufflevector(__ret_912, __ret_912, __lane_reverse_128_64); \ + __ret_912; \ }) #endif -#define vcopy_lane_p64(__p0_897, __p1_897, __p2_897, __p3_897) __extension__ ({ \ - poly64x1_t __ret_897; \ - poly64x1_t __s0_897 = __p0_897; \ - poly64x1_t __s2_897 = __p2_897; \ - __ret_897 = vset_lane_p64(vget_lane_p64(__s2_897, __p3_897), __s0_897, __p1_897); \ - __ret_897; \ +#define vcopy_lane_p64(__p0_913, __p1_913, __p2_913, __p3_913) __extension__ ({ \ + poly64x1_t __ret_913; \ + poly64x1_t __s0_913 = __p0_913; \ + poly64x1_t __s2_913 = __p2_913; \ + __ret_913 = vset_lane_p64(vget_lane_p64(__s2_913, __p3_913), __s0_913, __p1_913); \ + __ret_913; \ }) -#define vcopy_lane_f64(__p0_898, __p1_898, __p2_898, __p3_898) __extension__ ({ \ - float64x1_t __ret_898; \ - float64x1_t __s0_898 = __p0_898; \ - float64x1_t __s2_898 = __p2_898; \ - __ret_898 = vset_lane_f64(vget_lane_f64(__s2_898, __p3_898), __s0_898, __p1_898); \ - __ret_898; \ +#define vcopy_lane_f64(__p0_914, __p1_914, __p2_914, __p3_914) __extension__ ({ \ + float64x1_t __ret_914; \ + float64x1_t __s0_914 = __p0_914; \ + float64x1_t __s2_914 = __p2_914; \ + __ret_914 = vset_lane_f64(vget_lane_f64(__s2_914, __p3_914), __s0_914, __p1_914); \ + __ret_914; \ }) #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p64(__p0_899, __p1_899, __p2_899, __p3_899) __extension__ ({ \ - poly64x2_t __ret_899; \ - poly64x2_t __s0_899 = __p0_899; \ - poly64x2_t __s2_899 = __p2_899; \ - __ret_899 = vsetq_lane_p64(vgetq_lane_p64(__s2_899, __p3_899), __s0_899, __p1_899); \ - __ret_899; \ +#define vcopyq_laneq_p64(__p0_915, __p1_915, __p2_915, __p3_915) __extension__ ({ \ + poly64x2_t __ret_915; \ + poly64x2_t __s0_915 = __p0_915; \ + poly64x2_t __s2_915 = __p2_915; \ + __ret_915 = vsetq_lane_p64(vgetq_lane_p64(__s2_915, __p3_915), __s0_915, __p1_915); \ + __ret_915; \ }) #else -#define vcopyq_laneq_p64(__p0_900, __p1_900, __p2_900, __p3_900) __extension__ ({ \ - poly64x2_t __ret_900; \ - poly64x2_t __s0_900 = __p0_900; \ - poly64x2_t __s2_900 = __p2_900; \ - poly64x2_t __rev0_900; __rev0_900 = __builtin_shufflevector(__s0_900, __s0_900, 1, 0); \ - poly64x2_t __rev2_900; __rev2_900 = __builtin_shufflevector(__s2_900, __s2_900, 1, 0); \ - __ret_900 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_900, __p3_900), __rev0_900, __p1_900); \ - __ret_900 = __builtin_shufflevector(__ret_900, __ret_900, 1, 0); \ - __ret_900; \ +#define vcopyq_laneq_p64(__p0_916, __p1_916, __p2_916, __p3_916) __extension__ ({ \ + poly64x2_t __ret_916; \ + poly64x2_t __s0_916 = __p0_916; \ + poly64x2_t __s2_916 = __p2_916; \ + poly64x2_t __rev0_916; __rev0_916 = __builtin_shufflevector(__s0_916, __s0_916, __lane_reverse_128_64); \ + poly64x2_t __rev2_916; __rev2_916 = __builtin_shufflevector(__s2_916, __s2_916, __lane_reverse_128_64); \ + __ret_916 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_916, __p3_916), __rev0_916, __p1_916); \ + __ret_916 = __builtin_shufflevector(__ret_916, __ret_916, __lane_reverse_128_64); \ + __ret_916; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f64(__p0_901, __p1_901, __p2_901, __p3_901) __extension__ ({ \ - float64x2_t __ret_901; \ - float64x2_t __s0_901 = __p0_901; \ - float64x2_t __s2_901 = __p2_901; \ - __ret_901 = vsetq_lane_f64(vgetq_lane_f64(__s2_901, __p3_901), __s0_901, __p1_901); \ - __ret_901; \ +#define vcopyq_laneq_f64(__p0_917, __p1_917, __p2_917, __p3_917) __extension__ ({ \ + float64x2_t __ret_917; \ + float64x2_t __s0_917 = __p0_917; \ + float64x2_t __s2_917 = __p2_917; \ + __ret_917 = vsetq_lane_f64(vgetq_lane_f64(__s2_917, __p3_917), __s0_917, __p1_917); \ + __ret_917; \ }) #else -#define vcopyq_laneq_f64(__p0_902, __p1_902, __p2_902, __p3_902) __extension__ ({ \ - float64x2_t __ret_902; \ - float64x2_t __s0_902 = __p0_902; \ - float64x2_t __s2_902 = __p2_902; \ - float64x2_t __rev0_902; __rev0_902 = __builtin_shufflevector(__s0_902, __s0_902, 1, 0); \ - float64x2_t __rev2_902; __rev2_902 = __builtin_shufflevector(__s2_902, __s2_902, 1, 0); \ - __ret_902 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_902, __p3_902), __rev0_902, __p1_902); \ - __ret_902 = __builtin_shufflevector(__ret_902, __ret_902, 1, 0); \ - __ret_902; \ +#define vcopyq_laneq_f64(__p0_918, __p1_918, __p2_918, __p3_918) __extension__ ({ \ + float64x2_t __ret_918; \ + float64x2_t __s0_918 = __p0_918; \ + float64x2_t __s2_918 = __p2_918; \ + float64x2_t __rev0_918; __rev0_918 = __builtin_shufflevector(__s0_918, __s0_918, __lane_reverse_128_64); \ + float64x2_t __rev2_918; __rev2_918 = __builtin_shufflevector(__s2_918, __s2_918, __lane_reverse_128_64); \ + __ret_918 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_918, __p3_918), __rev0_918, __p1_918); \ + __ret_918 = __builtin_shufflevector(__ret_918, __ret_918, __lane_reverse_128_64); \ + __ret_918; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p64(__p0_903, __p1_903, __p2_903, __p3_903) __extension__ ({ \ - poly64x1_t __ret_903; \ - poly64x1_t __s0_903 = __p0_903; \ - poly64x2_t __s2_903 = __p2_903; \ - __ret_903 = vset_lane_p64(vgetq_lane_p64(__s2_903, __p3_903), __s0_903, __p1_903); \ - __ret_903; \ +#define vcopy_laneq_p64(__p0_919, __p1_919, __p2_919, __p3_919) __extension__ ({ \ + poly64x1_t __ret_919; \ + poly64x1_t __s0_919 = __p0_919; \ + poly64x2_t __s2_919 = __p2_919; \ + __ret_919 = vset_lane_p64(vgetq_lane_p64(__s2_919, __p3_919), __s0_919, __p1_919); \ + __ret_919; \ }) #else -#define vcopy_laneq_p64(__p0_904, __p1_904, __p2_904, __p3_904) __extension__ ({ \ - poly64x1_t __ret_904; \ - poly64x1_t __s0_904 = __p0_904; \ - poly64x2_t __s2_904 = __p2_904; \ - poly64x2_t __rev2_904; __rev2_904 = __builtin_shufflevector(__s2_904, __s2_904, 1, 0); \ - __ret_904 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_904, __p3_904), __s0_904, __p1_904); \ - __ret_904; \ +#define vcopy_laneq_p64(__p0_920, __p1_920, __p2_920, __p3_920) __extension__ ({ \ + poly64x1_t __ret_920; \ + poly64x1_t __s0_920 = __p0_920; \ + poly64x2_t __s2_920 = __p2_920; \ + poly64x2_t __rev2_920; __rev2_920 = __builtin_shufflevector(__s2_920, __s2_920, __lane_reverse_128_64); \ + __ret_920 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_920, __p3_920), __s0_920, __p1_920); \ + __ret_920; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f64(__p0_905, __p1_905, __p2_905, __p3_905) __extension__ ({ \ - float64x1_t __ret_905; \ - float64x1_t __s0_905 = __p0_905; \ - float64x2_t __s2_905 = __p2_905; \ - __ret_905 = vset_lane_f64(vgetq_lane_f64(__s2_905, __p3_905), __s0_905, __p1_905); \ - __ret_905; \ +#define vcopy_laneq_f64(__p0_921, __p1_921, __p2_921, __p3_921) __extension__ ({ \ + float64x1_t __ret_921; \ + float64x1_t __s0_921 = __p0_921; \ + float64x2_t __s2_921 = __p2_921; \ + __ret_921 = vset_lane_f64(vgetq_lane_f64(__s2_921, __p3_921), __s0_921, __p1_921); \ + __ret_921; \ }) #else -#define vcopy_laneq_f64(__p0_906, __p1_906, __p2_906, __p3_906) __extension__ ({ \ - float64x1_t __ret_906; \ - float64x1_t __s0_906 = __p0_906; \ - float64x2_t __s2_906 = __p2_906; \ - float64x2_t __rev2_906; __rev2_906 = __builtin_shufflevector(__s2_906, __s2_906, 1, 0); \ - __ret_906 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_906, __p3_906), __s0_906, __p1_906); \ - __ret_906; \ +#define vcopy_laneq_f64(__p0_922, __p1_922, __p2_922, __p3_922) __extension__ ({ \ + float64x1_t __ret_922; \ + float64x1_t __s0_922 = __p0_922; \ + float64x2_t __s2_922 = __p2_922; \ + float64x2_t __rev2_922; __rev2_922 = __builtin_shufflevector(__s2_922, __s2_922, __lane_reverse_128_64); \ + __ret_922 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_922, __p3_922), __s0_922, __p1_922); \ + __ret_922; \ }) #endif @@ -70895,11 +73413,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlal_high_u8(uint16x8_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vmlal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __noswap_vmlal_u8(__rev0, __noswap_vget_high_u8(__rev1), __noswap_vget_high_u8(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70913,11 +73431,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlal_high_u32(uint64x2_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vmlal_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vmlal_u32(__rev0, __noswap_vget_high_u32(__rev1), __noswap_vget_high_u32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70931,11 +73449,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlal_high_u16(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vmlal_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vmlal_u16(__rev0, __noswap_vget_high_u16(__rev1), __noswap_vget_high_u16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -70949,11 +73467,11 @@ __ai __attribute__((target("neon"))) int16x8_t vmlal_high_s8(int16x8_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vmlal_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __noswap_vmlal_s8(__rev0, __noswap_vget_high_s8(__rev1), __noswap_vget_high_s8(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -70967,11 +73485,11 @@ __ai __attribute__((target("neon"))) int64x2_t vmlal_high_s32(int64x2_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vmlal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vmlal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -70985,11 +73503,11 @@ __ai __attribute__((target("neon"))) int32x4_t vmlal_high_s16(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vmlal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71003,10 +73521,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlal_high_n_u32(uint64x2_t __p0 #else __ai __attribute__((target("neon"))) uint64x2_t vmlal_high_n_u32(uint64x2_t __p0, uint32x4_t __p1, uint32_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmlal_n_u32(__rev0, __noswap_vget_high_u32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71020,10 +73538,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlal_high_n_u16(uint32x4_t __p0 #else __ai __attribute__((target("neon"))) uint32x4_t vmlal_high_n_u16(uint32x4_t __p0, uint16x8_t __p1, uint16_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmlal_n_u16(__rev0, __noswap_vget_high_u16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71037,10 +73555,10 @@ __ai __attribute__((target("neon"))) int64x2_t vmlal_high_n_s32(int64x2_t __p0, #else __ai __attribute__((target("neon"))) int64x2_t vmlal_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmlal_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71054,10 +73572,10 @@ __ai __attribute__((target("neon"))) int32x4_t vmlal_high_n_s16(int32x4_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmlal_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71071,11 +73589,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vmlsl_high_u8(uint16x8_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vmlsl_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __noswap_vmlsl_u8(__rev0, __noswap_vget_high_u8(__rev1), __noswap_vget_high_u8(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -71089,11 +73607,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlsl_high_u32(uint64x2_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vmlsl_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vmlsl_u32(__rev0, __noswap_vget_high_u32(__rev1), __noswap_vget_high_u32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71107,11 +73625,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlsl_high_u16(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vmlsl_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vmlsl_u16(__rev0, __noswap_vget_high_u16(__rev1), __noswap_vget_high_u16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71125,11 +73643,11 @@ __ai __attribute__((target("neon"))) int16x8_t vmlsl_high_s8(int16x8_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vmlsl_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __noswap_vmlsl_s8(__rev0, __noswap_vget_high_s8(__rev1), __noswap_vget_high_s8(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -71143,11 +73661,11 @@ __ai __attribute__((target("neon"))) int64x2_t vmlsl_high_s32(int64x2_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vmlsl_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vmlsl_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71161,11 +73679,11 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsl_high_s16(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vmlsl_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71179,10 +73697,10 @@ __ai __attribute__((target("neon"))) uint64x2_t vmlsl_high_n_u32(uint64x2_t __p0 #else __ai __attribute__((target("neon"))) uint64x2_t vmlsl_high_n_u32(uint64x2_t __p0, uint32x4_t __p1, uint32_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmlsl_n_u32(__rev0, __noswap_vget_high_u32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71196,10 +73714,10 @@ __ai __attribute__((target("neon"))) uint32x4_t vmlsl_high_n_u16(uint32x4_t __p0 #else __ai __attribute__((target("neon"))) uint32x4_t vmlsl_high_n_u16(uint32x4_t __p0, uint16x8_t __p1, uint16_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmlsl_n_u16(__rev0, __noswap_vget_high_u16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71213,10 +73731,10 @@ __ai __attribute__((target("neon"))) int64x2_t vmlsl_high_n_s32(int64x2_t __p0, #else __ai __attribute__((target("neon"))) int64x2_t vmlsl_high_n_s32(int64x2_t __p0, int32x4_t __p1, int32_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); __ret = __noswap_vmlsl_n_s32(__rev0, __noswap_vget_high_s32(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71230,46 +73748,46 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsl_high_n_s16(int32x4_t __p0, #else __ai __attribute__((target("neon"))) int32x4_t vmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); __ret = __noswap_vmlsl_n_s16(__rev0, __noswap_vget_high_s16(__rev1), __p2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif -#define vmulx_lane_f64(__p0_907, __p1_907, __p2_907) __extension__ ({ \ - float64x1_t __ret_907; \ - float64x1_t __s0_907 = __p0_907; \ - float64x1_t __s1_907 = __p1_907; \ - float64_t __x_907 = vget_lane_f64(__s0_907, 0); \ - float64_t __y_907 = vget_lane_f64(__s1_907, __p2_907); \ - float64_t __z_907 = vmulxd_f64(__x_907, __y_907); \ - __ret_907 = vset_lane_f64(__z_907, __s0_907, __p2_907); \ - __ret_907; \ +#define vmulx_lane_f64(__p0_923, __p1_923, __p2_923) __extension__ ({ \ + float64x1_t __ret_923; \ + float64x1_t __s0_923 = __p0_923; \ + float64x1_t __s1_923 = __p1_923; \ + float64_t __x_923 = vget_lane_f64(__s0_923, 0); \ + float64_t __y_923 = vget_lane_f64(__s1_923, __p2_923); \ + float64_t __z_923 = vmulxd_f64(__x_923, __y_923); \ + __ret_923 = vset_lane_f64(__z_923, __s0_923, __p2_923); \ + __ret_923; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f64(__p0_908, __p1_908, __p2_908) __extension__ ({ \ - float64x1_t __ret_908; \ - float64x1_t __s0_908 = __p0_908; \ - float64x2_t __s1_908 = __p1_908; \ - float64_t __x_908 = vget_lane_f64(__s0_908, 0); \ - float64_t __y_908 = vgetq_lane_f64(__s1_908, __p2_908); \ - float64_t __z_908 = vmulxd_f64(__x_908, __y_908); \ - __ret_908 = vset_lane_f64(__z_908, __s0_908, 0); \ - __ret_908; \ +#define vmulx_laneq_f64(__p0_924, __p1_924, __p2_924) __extension__ ({ \ + float64x1_t __ret_924; \ + float64x1_t __s0_924 = __p0_924; \ + float64x2_t __s1_924 = __p1_924; \ + float64_t __x_924 = vget_lane_f64(__s0_924, 0); \ + float64_t __y_924 = vgetq_lane_f64(__s1_924, __p2_924); \ + float64_t __z_924 = vmulxd_f64(__x_924, __y_924); \ + __ret_924 = vset_lane_f64(__z_924, __s0_924, 0); \ + __ret_924; \ }) #else -#define vmulx_laneq_f64(__p0_909, __p1_909, __p2_909) __extension__ ({ \ - float64x1_t __ret_909; \ - float64x1_t __s0_909 = __p0_909; \ - float64x2_t __s1_909 = __p1_909; \ - float64x2_t __rev1_909; __rev1_909 = __builtin_shufflevector(__s1_909, __s1_909, 1, 0); \ - float64_t __x_909 = vget_lane_f64(__s0_909, 0); \ - float64_t __y_909 = __noswap_vgetq_lane_f64(__rev1_909, __p2_909); \ - float64_t __z_909 = vmulxd_f64(__x_909, __y_909); \ - __ret_909 = vset_lane_f64(__z_909, __s0_909, 0); \ - __ret_909; \ +#define vmulx_laneq_f64(__p0_925, __p1_925, __p2_925) __extension__ ({ \ + float64x1_t __ret_925; \ + float64x1_t __s0_925 = __p0_925; \ + float64x2_t __s1_925 = __p1_925; \ + float64x2_t __rev1_925; __rev1_925 = __builtin_shufflevector(__s1_925, __s1_925, __lane_reverse_128_64); \ + float64_t __x_925 = vget_lane_f64(__s0_925, 0); \ + float64_t __y_925 = __noswap_vgetq_lane_f64(__rev1_925, __p2_925); \ + float64_t __z_925 = vmulxd_f64(__x_925, __y_925); \ + __ret_925 = vset_lane_f64(__z_925, __s0_925, 0); \ + __ret_925; \ }) #endif @@ -71283,11 +73801,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vabal_u8(uint16x8_t __p0, uint8x #else __ai __attribute__((target("neon"))) uint16x8_t vabal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __noswap_vabdl_u8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) uint16x8_t __noswap_vabal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { @@ -71306,11 +73824,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vabal_u32(uint64x2_t __p0, uint3 #else __ai __attribute__((target("neon"))) uint64x2_t vabal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + uint32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __noswap_vabdl_u32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) uint64x2_t __noswap_vabal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { @@ -71329,11 +73847,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vabal_u16(uint32x4_t __p0, uint1 #else __ai __attribute__((target("neon"))) uint32x4_t vabal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + uint16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __noswap_vabdl_u16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) uint32x4_t __noswap_vabal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { @@ -71352,11 +73870,11 @@ __ai __attribute__((target("neon"))) int16x8_t vabal_s8(int16x8_t __p0, int8x8_t #else __ai __attribute__((target("neon"))) int16x8_t vabal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_8); + int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_8); __ret = __rev0 + __noswap_vabdl_s8(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } __ai __attribute__((target("neon"))) int16x8_t __noswap_vabal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { @@ -71375,11 +73893,11 @@ __ai __attribute__((target("neon"))) int64x2_t vabal_s32(int64x2_t __p0, int32x2 #else __ai __attribute__((target("neon"))) int64x2_t vabal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_32); + int32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_32); __ret = __rev0 + __noswap_vabdl_s32(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } __ai __attribute__((target("neon"))) int64x2_t __noswap_vabal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { @@ -71398,11 +73916,11 @@ __ai __attribute__((target("neon"))) int32x4_t vabal_s16(int32x4_t __p0, int16x4 #else __ai __attribute__((target("neon"))) int32x4_t vabal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_64_16); + int16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_64_16); __ret = __rev0 + __noswap_vabdl_s16(__rev1, __rev2); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } __ai __attribute__((target("neon"))) int32x4_t __noswap_vabal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { @@ -71422,11 +73940,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vabal_high_u8(uint16x8_t __p0, u #else __ai __attribute__((target("neon"))) uint16x8_t vabal_high_u8(uint16x8_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint16x8_t __ret; - uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __noswap_vabal_u8(__rev0, __noswap_vget_high_u8(__rev1), __noswap_vget_high_u8(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -71440,11 +73958,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vabal_high_u32(uint64x2_t __p0, #else __ai __attribute__((target("neon"))) uint64x2_t vabal_high_u32(uint64x2_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint64x2_t __ret; - uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + uint32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vabal_u32(__rev0, __noswap_vget_high_u32(__rev1), __noswap_vget_high_u32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71458,11 +73976,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vabal_high_u16(uint32x4_t __p0, #else __ai __attribute__((target("neon"))) uint32x4_t vabal_high_u16(uint32x4_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint32x4_t __ret; - uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + uint16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vabal_u16(__rev0, __noswap_vget_high_u16(__rev1), __noswap_vget_high_u16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif @@ -71476,11 +73994,11 @@ __ai __attribute__((target("neon"))) int16x8_t vabal_high_s8(int16x8_t __p0, int #else __ai __attribute__((target("neon"))) int16x8_t vabal_high_s8(int16x8_t __p0, int8x16_t __p1, int8x16_t __p2) { int16x8_t __ret; - int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_16); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_8); + int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_8); __ret = __noswap_vabal_s8(__rev0, __noswap_vget_high_s8(__rev1), __noswap_vget_high_s8(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_16); return __ret; } #endif @@ -71494,11 +74012,11 @@ __ai __attribute__((target("neon"))) int64x2_t vabal_high_s32(int64x2_t __p0, in #else __ai __attribute__((target("neon"))) int64x2_t vabal_high_s32(int64x2_t __p0, int32x4_t __p1, int32x4_t __p2) { int64x2_t __ret; - int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); - int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_64); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_32); + int32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_32); __ret = __noswap_vabal_s32(__rev0, __noswap_vget_high_s32(__rev1), __noswap_vget_high_s32(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_64); return __ret; } #endif @@ -71512,11 +74030,11 @@ __ai __attribute__((target("neon"))) int32x4_t vabal_high_s16(int32x4_t __p0, in #else __ai __attribute__((target("neon"))) int32x4_t vabal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) { int32x4_t __ret; - int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, __lane_reverse_128_32); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, __lane_reverse_128_16); + int16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, __lane_reverse_128_16); __ret = __noswap_vabal_s16(__rev0, __noswap_vget_high_s16(__rev1), __noswap_vget_high_s16(__rev2)); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + __ret = __builtin_shufflevector(__ret, __ret, __lane_reverse_128_32); return __ret; } #endif diff --git a/lib/include/arm_sme.h b/lib/include/arm_sme.h index 19f0191ac5..6da5ca0b51 100644 --- a/lib/include/arm_sme.h +++ b/lib/include/arm_sme.h @@ -146,6 +146,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s32 svint32_t svread_hor_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m))) svint64_t svread_hor_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_mf8_m))) +svmfloat8_t svread_hor_za128_mf8_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m))) svint16_t svread_hor_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m))) @@ -172,6 +174,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m) svuint8_t svread_hor_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m))) svint8_t svread_hor_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_mf8_m))) +svmfloat8_t svread_hor_za8_mf8_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m))) svuint8_t svread_ver_za128_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m))) @@ -194,6 +198,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s32 svint32_t svread_ver_za128_s32_m(svint32_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m))) svint64_t svread_ver_za128_s64_m(svint64_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_mf8_m))) +svmfloat8_t svread_ver_za128_mf8_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m))) svint16_t svread_ver_za128_s16_m(svint16_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m))) @@ -220,6 +226,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m) svuint8_t svread_ver_za8_u8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m))) svint8_t svread_ver_za8_s8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_mf8_m))) +svmfloat8_t svread_ver_za8_mf8_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za128))) void svst1_hor_vnum_za128(uint64_t, uint32_t, svbool_t, void *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svst1_hor_vnum_za16))) @@ -294,6 +302,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s3 void svwrite_hor_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m))) void svwrite_hor_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_mf8_m))) +void svwrite_hor_za128_mf8_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m))) void svwrite_hor_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m))) @@ -320,6 +330,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_m void svwrite_hor_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m))) void svwrite_hor_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_mf8_m))) +void svwrite_hor_za8_mf8_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m))) void svwrite_ver_za128_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m))) @@ -342,6 +354,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s3 void svwrite_ver_za128_s32_m(uint64_t, uint32_t, svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m))) void svwrite_ver_za128_s64_m(uint64_t, uint32_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_mf8_m))) +void svwrite_ver_za128_mf8_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m))) void svwrite_ver_za128_s16_m(uint64_t, uint32_t, svbool_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m))) @@ -368,6 +382,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_m void svwrite_ver_za8_u8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) void svwrite_ver_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_mf8_m))) +void svwrite_ver_za8_mf8_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za))) void svzero_mask_za(uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za))) @@ -422,6 +438,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s3 svint32_t svread_hor_za128_m(svint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s64_m))) svint64_t svread_hor_za128_m(svint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_mf8_m))) +svmfloat8_t svread_hor_za128_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za128_s16_m))) svint16_t svread_hor_za128_m(svint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za16_u16_m))) @@ -448,6 +466,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_m svuint8_t svread_hor_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_m))) svint8_t svread_hor_za8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_mf8_m))) +svmfloat8_t svread_hor_za8_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u8_m))) svuint8_t svread_ver_za128_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_u32_m))) @@ -470,6 +490,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s3 svint32_t svread_ver_za128_m(svint32_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s64_m))) svint64_t svread_ver_za128_m(svint64_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_mf8_m))) +svmfloat8_t svread_ver_za128_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za128_s16_m))) svint16_t svread_ver_za128_m(svint16_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_m))) @@ -496,6 +518,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_m svuint8_t svread_ver_za8_m(svuint8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_m))) svint8_t svread_ver_za8_m(svint8_t, svbool_t, uint64_t, uint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_mf8_m))) +svmfloat8_t svread_ver_za8_m(svmfloat8_t, svbool_t, uint64_t, uint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumopa_za32_s8_m))) void svsumopa_za32_m(uint64_t, svbool_t, svbool_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsumops_za32_s8_m))) @@ -526,6 +550,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s64_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_mf8_m))) +void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za128_s16_m))) void svwrite_hor_za128_m(uint64_t, uint32_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za16_u16_m))) @@ -552,6 +578,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_ void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_m))) void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_mf8_m))) +void svwrite_hor_za8_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u8_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_u32_m))) @@ -574,6 +602,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s64_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_mf8_m))) +void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za128_s16_m))) void svwrite_ver_za128_m(uint64_t, uint32_t, svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_m))) @@ -600,6 +630,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_ void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_mf8_m))) +void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2))) void svadd_za16_f16_vg1x2(uint32_t, svfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4))) @@ -1158,6 +1190,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16) svfloat16_t svluti2_lane_zt_f16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32))) svint32_t svluti2_lane_zt_s32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_mf8))) +svmfloat8_t svluti2_lane_zt_mf8(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16))) svint16_t svluti2_lane_zt_s16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x2))) @@ -1176,6 +1210,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_ svfloat16x2_t svluti2_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x2))) svint32x2_t svluti2_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_mf8_x2))) +svmfloat8x2_t svluti2_lane_zt_mf8_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x2))) svint16x2_t svluti2_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_u8_x4))) @@ -1194,6 +1230,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_f16_ svfloat16x4_t svluti2_lane_zt_f16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s32_x4))) svint32x4_t svluti2_lane_zt_s32_x4(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_mf8_x4))) +svmfloat8x4_t svluti2_lane_zt_mf8_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti2_lane_zt_s16_x4))) svint16x4_t svluti2_lane_zt_s16_x4(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8))) @@ -1212,6 +1250,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16) svfloat16_t svluti4_lane_zt_f16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32))) svint32_t svluti4_lane_zt_s32(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_mf8))) +svmfloat8_t svluti4_lane_zt_mf8(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16))) svint16_t svluti4_lane_zt_s16(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u8_x2))) @@ -1230,6 +1270,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_f16_ svfloat16x2_t svluti4_lane_zt_f16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s32_x2))) svint32x2_t svluti4_lane_zt_s32_x2(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_mf8_x2))) +svmfloat8x2_t svluti4_lane_zt_mf8_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_s16_x2))) svint16x2_t svluti4_lane_zt_s16_x2(uint64_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_lane_zt_u32_x4))) @@ -1514,10 +1556,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg svuint8x2_t svread_hor_za8_u8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg2))) svint8x2_t svread_hor_za8_s8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_mf8_vg2))) +svmfloat8x2_t svread_hor_za8_mf8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_u8_vg4))) svuint8x4_t svread_hor_za8_u8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_s8_vg4))) svint8x4_t svread_hor_za8_s8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_hor_za8_mf8_vg4))) +svmfloat8x4_t svread_hor_za8_mf8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_u16_vg2))) svuint16x2_t svread_ver_za16_u16_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za16_bf16_vg2))) @@ -1562,10 +1608,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg svuint8x2_t svread_ver_za8_u8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg2))) svint8x2_t svread_ver_za8_s8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_mf8_vg2))) +svmfloat8x2_t svread_ver_za8_mf8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_u8_vg4))) svuint8x4_t svread_ver_za8_u8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_s8_vg4))) svint8x4_t svread_ver_za8_s8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_ver_za8_mf8_vg4))) +svmfloat8x4_t svread_ver_za8_mf8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_u16_vg1x2))) svuint16x2_t svread_za16_u16_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za16_bf16_vg1x2))) @@ -1610,10 +1660,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x2) svuint8x2_t svread_za8_u8_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x2))) svint8x2_t svread_za8_s8_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_mf8_vg1x2))) +svmfloat8x2_t svread_za8_mf8_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_u8_vg1x4))) svuint8x4_t svread_za8_u8_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_s8_vg1x4))) svint8x4_t svread_za8_s8_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svread_za8_mf8_vg1x4))) +svmfloat8x4_t svread_za8_mf8_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svstr_zt))) void svstr_zt(uint64_t, void *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_write_single_za32_u32_vg1x2))) @@ -1760,10 +1814,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_v void svwrite_hor_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2))) void svwrite_hor_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_mf8_vg2))) +void svwrite_hor_za8_mf8_vg2(uint64_t, uint32_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4))) void svwrite_hor_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4))) void svwrite_hor_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_mf8_vg4))) +void svwrite_hor_za8_mf8_vg4(uint64_t, uint32_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2))) void svwrite_ver_za16_u16_vg2(uint64_t, uint32_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2))) @@ -1808,10 +1866,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_v void svwrite_ver_za8_u8_vg2(uint64_t, uint32_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2))) void svwrite_ver_za8_s8_vg2(uint64_t, uint32_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_mf8_vg2))) +void svwrite_ver_za8_mf8_vg2(uint64_t, uint32_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4))) void svwrite_ver_za8_u8_vg4(uint64_t, uint32_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4))) void svwrite_ver_za8_s8_vg4(uint64_t, uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_mf8_vg4))) +void svwrite_ver_za8_mf8_vg4(uint64_t, uint32_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2))) void svwrite_za16_u16_vg1x2(uint32_t, svuint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2))) @@ -1856,10 +1918,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x2 void svwrite_za8_u8_vg1x2(uint32_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2))) void svwrite_za8_s8_vg1x2(uint32_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_mf8_vg1x2))) +void svwrite_za8_mf8_vg1x2(uint32_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4))) void svwrite_za8_u8_vg1x4(uint32_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) void svwrite_za8_s8_vg1x4(uint32_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_mf8_vg1x4))) +void svwrite_za8_mf8_vg1x4(uint32_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_zt))) void svzero_zt(uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2))) @@ -2338,10 +2404,14 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_ void svwrite_hor_za8_vg2(uint64_t, uint32_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg2))) void svwrite_hor_za8_vg2(uint64_t, uint32_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_mf8_vg2))) +void svwrite_hor_za8_vg2(uint64_t, uint32_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_u8_vg4))) void svwrite_hor_za8_vg4(uint64_t, uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_s8_vg4))) void svwrite_hor_za8_vg4(uint64_t, uint32_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_hor_za8_mf8_vg4))) +void svwrite_hor_za8_vg4(uint64_t, uint32_t, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_u16_vg2))) void svwrite_ver_za16_vg2(uint64_t, uint32_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za16_bf16_vg2))) @@ -2386,10 +2456,14 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_ void svwrite_ver_za8_vg2(uint64_t, uint32_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg2))) void svwrite_ver_za8_vg2(uint64_t, uint32_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_mf8_vg2))) +void svwrite_ver_za8_vg2(uint64_t, uint32_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_vg4))) void svwrite_ver_za8_vg4(uint64_t, uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_vg4))) void svwrite_ver_za8_vg4(uint64_t, uint32_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_mf8_vg4))) +void svwrite_ver_za8_vg4(uint64_t, uint32_t, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_u16_vg1x2))) void svwrite_za16_vg1x2(uint32_t, svuint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za16_bf16_vg1x2))) @@ -2434,10 +2508,14 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x void svwrite_za8_vg1x2(uint32_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x2))) void svwrite_za8_vg1x2(uint32_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_mf8_vg1x2))) +void svwrite_za8_vg1x2(uint32_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x4))) void svwrite_za8_vg1x4(uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) void svwrite_za8_vg1x4(uint32_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_mf8_vg1x4))) +void svwrite_za8_vg1x4(uint32_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2))) void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4))) @@ -2782,6 +2860,602 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s1 void svvdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4))) void svvdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_s8_u8))) +void svmop4a_1x1_za32_s8_u8(uint64_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_u8_s8))) +void svmop4a_1x1_za32_u8_s8(uint64_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_bf16_bf16))) +void svmop4a_1x1_za32_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_f16_f16))) +void svmop4a_1x1_za32_f16_f16(uint64_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_f32_f32))) +void svmop4a_1x1_za32_f32_f32(uint64_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_s8_s8))) +void svmop4a_1x1_za32_s8_s8(uint64_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_s16_s16))) +void svmop4a_1x1_za32_s16_s16(uint64_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_u8_u8))) +void svmop4a_1x1_za32_u8_u8(uint64_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_u16_u16))) +void svmop4a_1x1_za32_u16_u16(uint64_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_s8_u8))) +void svmop4a_1x2_za32_s8_u8(uint64_t, svint8_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_u8_s8))) +void svmop4a_1x2_za32_u8_s8(uint64_t, svuint8_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_bf16_bf16))) +void svmop4a_1x2_za32_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_f16_f16))) +void svmop4a_1x2_za32_f16_f16(uint64_t, svfloat16_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_f32_f32))) +void svmop4a_1x2_za32_f32_f32(uint64_t, svfloat32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_s8_s8))) +void svmop4a_1x2_za32_s8_s8(uint64_t, svint8_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_s16_s16))) +void svmop4a_1x2_za32_s16_s16(uint64_t, svint16_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_u8_u8))) +void svmop4a_1x2_za32_u8_u8(uint64_t, svuint8_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_u16_u16))) +void svmop4a_1x2_za32_u16_u16(uint64_t, svuint16_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_s8_u8))) +void svmop4a_2x1_za32_s8_u8(uint64_t, svint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_u8_s8))) +void svmop4a_2x1_za32_u8_s8(uint64_t, svuint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_bf16_bf16))) +void svmop4a_2x1_za32_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_f16_f16))) +void svmop4a_2x1_za32_f16_f16(uint64_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_f32_f32))) +void svmop4a_2x1_za32_f32_f32(uint64_t, svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_s8_s8))) +void svmop4a_2x1_za32_s8_s8(uint64_t, svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_s16_s16))) +void svmop4a_2x1_za32_s16_s16(uint64_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_u8_u8))) +void svmop4a_2x1_za32_u8_u8(uint64_t, svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_u16_u16))) +void svmop4a_2x1_za32_u16_u16(uint64_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_s8_u8))) +void svmop4a_2x2_za32_s8_u8(uint64_t, svint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_u8_s8))) +void svmop4a_2x2_za32_u8_s8(uint64_t, svuint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_bf16_bf16))) +void svmop4a_2x2_za32_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_f16_f16))) +void svmop4a_2x2_za32_f16_f16(uint64_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_f32_f32))) +void svmop4a_2x2_za32_f32_f32(uint64_t, svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_s8_s8))) +void svmop4a_2x2_za32_s8_s8(uint64_t, svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_s16_s16))) +void svmop4a_2x2_za32_s16_s16(uint64_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_u8_u8))) +void svmop4a_2x2_za32_u8_u8(uint64_t, svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_u16_u16))) +void svmop4a_2x2_za32_u16_u16(uint64_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_s8_u8))) +void svmop4s_1x1_za32_s8_u8(uint64_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_u8_s8))) +void svmop4s_1x1_za32_u8_s8(uint64_t, svuint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_bf16_bf16))) +void svmop4s_1x1_za32_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_f16_f16))) +void svmop4s_1x1_za32_f16_f16(uint64_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_f32_f32))) +void svmop4s_1x1_za32_f32_f32(uint64_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_s8_s8))) +void svmop4s_1x1_za32_s8_s8(uint64_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_s16_s16))) +void svmop4s_1x1_za32_s16_s16(uint64_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_u8_u8))) +void svmop4s_1x1_za32_u8_u8(uint64_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_u16_u16))) +void svmop4s_1x1_za32_u16_u16(uint64_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_s8_u8))) +void svmop4s_1x2_za32_s8_u8(uint64_t, svint8_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_u8_s8))) +void svmop4s_1x2_za32_u8_s8(uint64_t, svuint8_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_bf16_bf16))) +void svmop4s_1x2_za32_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_f16_f16))) +void svmop4s_1x2_za32_f16_f16(uint64_t, svfloat16_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_f32_f32))) +void svmop4s_1x2_za32_f32_f32(uint64_t, svfloat32_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_s8_s8))) +void svmop4s_1x2_za32_s8_s8(uint64_t, svint8_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_s16_s16))) +void svmop4s_1x2_za32_s16_s16(uint64_t, svint16_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_u8_u8))) +void svmop4s_1x2_za32_u8_u8(uint64_t, svuint8_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_u16_u16))) +void svmop4s_1x2_za32_u16_u16(uint64_t, svuint16_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_s8_u8))) +void svmop4s_2x1_za32_s8_u8(uint64_t, svint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_u8_s8))) +void svmop4s_2x1_za32_u8_s8(uint64_t, svuint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_bf16_bf16))) +void svmop4s_2x1_za32_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_f16_f16))) +void svmop4s_2x1_za32_f16_f16(uint64_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_f32_f32))) +void svmop4s_2x1_za32_f32_f32(uint64_t, svfloat32x2_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_s8_s8))) +void svmop4s_2x1_za32_s8_s8(uint64_t, svint8x2_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_s16_s16))) +void svmop4s_2x1_za32_s16_s16(uint64_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_u8_u8))) +void svmop4s_2x1_za32_u8_u8(uint64_t, svuint8x2_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_u16_u16))) +void svmop4s_2x1_za32_u16_u16(uint64_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_s8_u8))) +void svmop4s_2x2_za32_s8_u8(uint64_t, svint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_u8_s8))) +void svmop4s_2x2_za32_u8_s8(uint64_t, svuint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_bf16_bf16))) +void svmop4s_2x2_za32_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_f16_f16))) +void svmop4s_2x2_za32_f16_f16(uint64_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_f32_f32))) +void svmop4s_2x2_za32_f32_f32(uint64_t, svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_s8_s8))) +void svmop4s_2x2_za32_s8_s8(uint64_t, svint8x2_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_s16_s16))) +void svmop4s_2x2_za32_s16_s16(uint64_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_u8_u8))) +void svmop4s_2x2_za32_u8_u8(uint64_t, svuint8x2_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_u16_u16))) +void svmop4s_2x2_za32_u16_u16(uint64_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_s8_u8))) +void svmop4a_za32(uint64_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_u8_s8))) +void svmop4a_za32(uint64_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_bf16_bf16))) +void svmop4a_za32(uint64_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_f16_f16))) +void svmop4a_za32(uint64_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_f32_f32))) +void svmop4a_za32(uint64_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_s8_s8))) +void svmop4a_za32(uint64_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_s16_s16))) +void svmop4a_za32(uint64_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_u8_u8))) +void svmop4a_za32(uint64_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_u16_u16))) +void svmop4a_za32(uint64_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_s8_u8))) +void svmop4a_za32(uint64_t, svint8_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_u8_s8))) +void svmop4a_za32(uint64_t, svuint8_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_bf16_bf16))) +void svmop4a_za32(uint64_t, svbfloat16_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_f16_f16))) +void svmop4a_za32(uint64_t, svfloat16_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_f32_f32))) +void svmop4a_za32(uint64_t, svfloat32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_s8_s8))) +void svmop4a_za32(uint64_t, svint8_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_s16_s16))) +void svmop4a_za32(uint64_t, svint16_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_u8_u8))) +void svmop4a_za32(uint64_t, svuint8_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_u16_u16))) +void svmop4a_za32(uint64_t, svuint16_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_s8_u8))) +void svmop4a_za32(uint64_t, svint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_u8_s8))) +void svmop4a_za32(uint64_t, svuint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_bf16_bf16))) +void svmop4a_za32(uint64_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_f16_f16))) +void svmop4a_za32(uint64_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_f32_f32))) +void svmop4a_za32(uint64_t, svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_s8_s8))) +void svmop4a_za32(uint64_t, svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_s16_s16))) +void svmop4a_za32(uint64_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_u8_u8))) +void svmop4a_za32(uint64_t, svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_u16_u16))) +void svmop4a_za32(uint64_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_s8_u8))) +void svmop4a_za32(uint64_t, svint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_u8_s8))) +void svmop4a_za32(uint64_t, svuint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_bf16_bf16))) +void svmop4a_za32(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_f16_f16))) +void svmop4a_za32(uint64_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_f32_f32))) +void svmop4a_za32(uint64_t, svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_s8_s8))) +void svmop4a_za32(uint64_t, svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_s16_s16))) +void svmop4a_za32(uint64_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_u8_u8))) +void svmop4a_za32(uint64_t, svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_u16_u16))) +void svmop4a_za32(uint64_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_s8_u8))) +void svmop4s_za32(uint64_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_u8_s8))) +void svmop4s_za32(uint64_t, svuint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_bf16_bf16))) +void svmop4s_za32(uint64_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_f16_f16))) +void svmop4s_za32(uint64_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_f32_f32))) +void svmop4s_za32(uint64_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_s8_s8))) +void svmop4s_za32(uint64_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_s16_s16))) +void svmop4s_za32(uint64_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_u8_u8))) +void svmop4s_za32(uint64_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za32_u16_u16))) +void svmop4s_za32(uint64_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_s8_u8))) +void svmop4s_za32(uint64_t, svint8_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_u8_s8))) +void svmop4s_za32(uint64_t, svuint8_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_bf16_bf16))) +void svmop4s_za32(uint64_t, svbfloat16_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_f16_f16))) +void svmop4s_za32(uint64_t, svfloat16_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_f32_f32))) +void svmop4s_za32(uint64_t, svfloat32_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_s8_s8))) +void svmop4s_za32(uint64_t, svint8_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_s16_s16))) +void svmop4s_za32(uint64_t, svint16_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_u8_u8))) +void svmop4s_za32(uint64_t, svuint8_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za32_u16_u16))) +void svmop4s_za32(uint64_t, svuint16_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_s8_u8))) +void svmop4s_za32(uint64_t, svint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_u8_s8))) +void svmop4s_za32(uint64_t, svuint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_bf16_bf16))) +void svmop4s_za32(uint64_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_f16_f16))) +void svmop4s_za32(uint64_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_f32_f32))) +void svmop4s_za32(uint64_t, svfloat32x2_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_s8_s8))) +void svmop4s_za32(uint64_t, svint8x2_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_s16_s16))) +void svmop4s_za32(uint64_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_u8_u8))) +void svmop4s_za32(uint64_t, svuint8x2_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za32_u16_u16))) +void svmop4s_za32(uint64_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_s8_u8))) +void svmop4s_za32(uint64_t, svint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_u8_s8))) +void svmop4s_za32(uint64_t, svuint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_bf16_bf16))) +void svmop4s_za32(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_f16_f16))) +void svmop4s_za32(uint64_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_f32_f32))) +void svmop4s_za32(uint64_t, svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_s8_s8))) +void svmop4s_za32(uint64_t, svint8x2_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_s16_s16))) +void svmop4s_za32(uint64_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_u8_u8))) +void svmop4s_za32(uint64_t, svuint8x2_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za32_u16_u16))) +void svmop4s_za32(uint64_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za16_bf16_bf16))) +void svmop4a_1x1_za16_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za16_bf16_bf16))) +void svmop4a_1x2_za16_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za16_bf16_bf16))) +void svmop4a_2x1_za16_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za16_bf16_bf16))) +void svmop4a_2x2_za16_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za16_bf16_bf16))) +void svmop4s_1x1_za16_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za16_bf16_bf16))) +void svmop4s_1x2_za16_bf16_bf16(uint64_t, svbfloat16_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za16_bf16_bf16))) +void svmop4s_2x1_za16_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za16_bf16_bf16))) +void svmop4s_2x2_za16_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za16_bf16_bf16))) +void svmop4a_za16(uint64_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za16_bf16_bf16))) +void svmop4a_za16(uint64_t, svbfloat16_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za16_bf16_bf16))) +void svmop4a_za16(uint64_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za16_bf16_bf16))) +void svmop4a_za16(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za16_bf16_bf16))) +void svmop4s_za16(uint64_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za16_bf16_bf16))) +void svmop4s_za16(uint64_t, svbfloat16_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za16_bf16_bf16))) +void svmop4s_za16(uint64_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za16_bf16_bf16))) +void svmop4s_za16(uint64_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za16_f16_f16))) +void svmop4a_1x1_za16_f16_f16(uint64_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za16_f16_f16))) +void svmop4a_1x2_za16_f16_f16(uint64_t, svfloat16_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za16_f16_f16))) +void svmop4a_2x1_za16_f16_f16(uint64_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za16_f16_f16))) +void svmop4a_2x2_za16_f16_f16(uint64_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za16_f16_f16))) +void svmop4s_1x1_za16_f16_f16(uint64_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za16_f16_f16))) +void svmop4s_1x2_za16_f16_f16(uint64_t, svfloat16_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za16_f16_f16))) +void svmop4s_2x1_za16_f16_f16(uint64_t, svfloat16x2_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za16_f16_f16))) +void svmop4s_2x2_za16_f16_f16(uint64_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za16_f16_f16))) +void svmop4a_za16(uint64_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za16_f16_f16))) +void svmop4a_za16(uint64_t, svfloat16_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za16_f16_f16))) +void svmop4a_za16(uint64_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za16_f16_f16))) +void svmop4a_za16(uint64_t, svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za16_f16_f16))) +void svmop4s_za16(uint64_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za16_f16_f16))) +void svmop4s_za16(uint64_t, svfloat16_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za16_f16_f16))) +void svmop4s_za16(uint64_t, svfloat16x2_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za16_f16_f16))) +void svmop4s_za16(uint64_t, svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_f64_f64))) +void svmop4a_1x1_za64_f64_f64(uint64_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_f64_f64))) +void svmop4a_1x2_za64_f64_f64(uint64_t, svfloat64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_f64_f64))) +void svmop4a_2x1_za64_f64_f64(uint64_t, svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_f64_f64))) +void svmop4a_2x2_za64_f64_f64(uint64_t, svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_f64_f64))) +void svmop4s_1x1_za64_f64_f64(uint64_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_f64_f64))) +void svmop4s_1x2_za64_f64_f64(uint64_t, svfloat64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_f64_f64))) +void svmop4s_2x1_za64_f64_f64(uint64_t, svfloat64x2_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_f64_f64))) +void svmop4s_2x2_za64_f64_f64(uint64_t, svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_f64_f64))) +void svmop4a_za64(uint64_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_f64_f64))) +void svmop4a_za64(uint64_t, svfloat64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_f64_f64))) +void svmop4a_za64(uint64_t, svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_f64_f64))) +void svmop4a_za64(uint64_t, svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_f64_f64))) +void svmop4s_za64(uint64_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_f64_f64))) +void svmop4s_za64(uint64_t, svfloat64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_f64_f64))) +void svmop4s_za64(uint64_t, svfloat64x2_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_f64_f64))) +void svmop4s_za64(uint64_t, svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za16_mf8_mf8_fpm))) +void svmop4a_1x1_za16_mf8_mf8_fpm(uint64_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za16_mf8_mf8_fpm))) +void svmop4a_1x2_za16_mf8_mf8_fpm(uint64_t, svmfloat8_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za16_mf8_mf8_fpm))) +void svmop4a_2x1_za16_mf8_mf8_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za16_mf8_mf8_fpm))) +void svmop4a_2x2_za16_mf8_mf8_fpm(uint64_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za16_mf8_mf8_fpm))) +void svmop4a_za16_fpm(uint64_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za16_mf8_mf8_fpm))) +void svmop4a_za16_fpm(uint64_t, svmfloat8_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za16_mf8_mf8_fpm))) +void svmop4a_za16_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za16_mf8_mf8_fpm))) +void svmop4a_za16_fpm(uint64_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_mf8_mf8_fpm))) +void svmop4a_1x1_za32_mf8_mf8_fpm(uint64_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_mf8_mf8_fpm))) +void svmop4a_1x2_za32_mf8_mf8_fpm(uint64_t, svmfloat8_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_mf8_mf8_fpm))) +void svmop4a_2x1_za32_mf8_mf8_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_mf8_mf8_fpm))) +void svmop4a_2x2_za32_mf8_mf8_fpm(uint64_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za32_mf8_mf8_fpm))) +void svmop4a_za32_fpm(uint64_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za32_mf8_mf8_fpm))) +void svmop4a_za32_fpm(uint64_t, svmfloat8_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za32_mf8_mf8_fpm))) +void svmop4a_za32_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za32_mf8_mf8_fpm))) +void svmop4a_za32_fpm(uint64_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_s16_u16))) +void svmop4a_1x1_za64_s16_u16(uint64_t, svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_u16_s16))) +void svmop4a_1x1_za64_u16_s16(uint64_t, svuint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_s16_s16))) +void svmop4a_1x1_za64_s16_s16(uint64_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_u16_u16))) +void svmop4a_1x1_za64_u16_u16(uint64_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_s16_u16))) +void svmop4a_1x2_za64_s16_u16(uint64_t, svint16_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_u16_s16))) +void svmop4a_1x2_za64_u16_s16(uint64_t, svuint16_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_s16_s16))) +void svmop4a_1x2_za64_s16_s16(uint64_t, svint16_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_u16_u16))) +void svmop4a_1x2_za64_u16_u16(uint64_t, svuint16_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_s16_u16))) +void svmop4a_2x1_za64_s16_u16(uint64_t, svint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_u16_s16))) +void svmop4a_2x1_za64_u16_s16(uint64_t, svuint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_s16_s16))) +void svmop4a_2x1_za64_s16_s16(uint64_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_u16_u16))) +void svmop4a_2x1_za64_u16_u16(uint64_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_s16_u16))) +void svmop4a_2x2_za64_s16_u16(uint64_t, svint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_u16_s16))) +void svmop4a_2x2_za64_u16_s16(uint64_t, svuint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_s16_s16))) +void svmop4a_2x2_za64_s16_s16(uint64_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_u16_u16))) +void svmop4a_2x2_za64_u16_u16(uint64_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_s16_u16))) +void svmop4s_1x1_za64_s16_u16(uint64_t, svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_u16_s16))) +void svmop4s_1x1_za64_u16_s16(uint64_t, svuint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_s16_s16))) +void svmop4s_1x1_za64_s16_s16(uint64_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_u16_u16))) +void svmop4s_1x1_za64_u16_u16(uint64_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_s16_u16))) +void svmop4s_1x2_za64_s16_u16(uint64_t, svint16_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_u16_s16))) +void svmop4s_1x2_za64_u16_s16(uint64_t, svuint16_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_s16_s16))) +void svmop4s_1x2_za64_s16_s16(uint64_t, svint16_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_u16_u16))) +void svmop4s_1x2_za64_u16_u16(uint64_t, svuint16_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_s16_u16))) +void svmop4s_2x1_za64_s16_u16(uint64_t, svint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_u16_s16))) +void svmop4s_2x1_za64_u16_s16(uint64_t, svuint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_s16_s16))) +void svmop4s_2x1_za64_s16_s16(uint64_t, svint16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_u16_u16))) +void svmop4s_2x1_za64_u16_u16(uint64_t, svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_s16_u16))) +void svmop4s_2x2_za64_s16_u16(uint64_t, svint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_u16_s16))) +void svmop4s_2x2_za64_u16_s16(uint64_t, svuint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_s16_s16))) +void svmop4s_2x2_za64_s16_s16(uint64_t, svint16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_u16_u16))) +void svmop4s_2x2_za64_u16_u16(uint64_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_s16_u16))) +void svmop4a_za64(uint64_t, svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_u16_s16))) +void svmop4a_za64(uint64_t, svuint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_s16_s16))) +void svmop4a_za64(uint64_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x1_za64_u16_u16))) +void svmop4a_za64(uint64_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_s16_u16))) +void svmop4a_za64(uint64_t, svint16_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_u16_s16))) +void svmop4a_za64(uint64_t, svuint16_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_s16_s16))) +void svmop4a_za64(uint64_t, svint16_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_1x2_za64_u16_u16))) +void svmop4a_za64(uint64_t, svuint16_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_s16_u16))) +void svmop4a_za64(uint64_t, svint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_u16_s16))) +void svmop4a_za64(uint64_t, svuint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_s16_s16))) +void svmop4a_za64(uint64_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x1_za64_u16_u16))) +void svmop4a_za64(uint64_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_s16_u16))) +void svmop4a_za64(uint64_t, svint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_u16_s16))) +void svmop4a_za64(uint64_t, svuint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_s16_s16))) +void svmop4a_za64(uint64_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4a_2x2_za64_u16_u16))) +void svmop4a_za64(uint64_t, svuint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_s16_u16))) +void svmop4s_za64(uint64_t, svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_u16_s16))) +void svmop4s_za64(uint64_t, svuint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_s16_s16))) +void svmop4s_za64(uint64_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x1_za64_u16_u16))) +void svmop4s_za64(uint64_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_s16_u16))) +void svmop4s_za64(uint64_t, svint16_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_u16_s16))) +void svmop4s_za64(uint64_t, svuint16_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_s16_s16))) +void svmop4s_za64(uint64_t, svint16_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_1x2_za64_u16_u16))) +void svmop4s_za64(uint64_t, svuint16_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_s16_u16))) +void svmop4s_za64(uint64_t, svint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_u16_s16))) +void svmop4s_za64(uint64_t, svuint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_s16_s16))) +void svmop4s_za64(uint64_t, svint16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x1_za64_u16_u16))) +void svmop4s_za64(uint64_t, svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_s16_u16))) +void svmop4s_za64(uint64_t, svint16x2_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_u16_s16))) +void svmop4s_za64(uint64_t, svuint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_s16_s16))) +void svmop4s_za64(uint64_t, svint16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmop4s_2x2_za64_u16_u16))) +void svmop4s_za64(uint64_t, svuint16x2_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_s8_u8))) +void svtmopa_lane_za32_s8_u8(uint64_t, svint8x2_t, svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_u8_s8))) +void svtmopa_lane_za32_u8_s8(uint64_t, svuint8x2_t, svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_s8_s8))) +void svtmopa_lane_za32_s8_s8(uint64_t, svint8x2_t, svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_s16_s16))) +void svtmopa_lane_za32_s16_s16(uint64_t, svint16x2_t, svint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_bf16_bf16))) +void svtmopa_lane_za32_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_f32_f32))) +void svtmopa_lane_za32_f32_f32(uint64_t, svfloat32x2_t, svfloat32_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_f16_f16))) +void svtmopa_lane_za32_f16_f16(uint64_t, svfloat16x2_t, svfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_u8_u8))) +void svtmopa_lane_za32_u8_u8(uint64_t, svuint8x2_t, svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_u16_u16))) +void svtmopa_lane_za32_u16_u16(uint64_t, svuint16x2_t, svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_s8_u8))) +void svtmopa_lane_za32(uint64_t, svint8x2_t, svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_u8_s8))) +void svtmopa_lane_za32(uint64_t, svuint8x2_t, svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_s8_s8))) +void svtmopa_lane_za32(uint64_t, svint8x2_t, svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_s16_s16))) +void svtmopa_lane_za32(uint64_t, svint16x2_t, svint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_bf16_bf16))) +void svtmopa_lane_za32(uint64_t, svbfloat16x2_t, svbfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_f32_f32))) +void svtmopa_lane_za32(uint64_t, svfloat32x2_t, svfloat32_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_f16_f16))) +void svtmopa_lane_za32(uint64_t, svfloat16x2_t, svfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_u8_u8))) +void svtmopa_lane_za32(uint64_t, svuint8x2_t, svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_u16_u16))) +void svtmopa_lane_za32(uint64_t, svuint16x2_t, svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za16_bf16_bf16))) +void svtmopa_lane_za16_bf16_bf16(uint64_t, svbfloat16x2_t, svbfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za16_bf16_bf16))) +void svtmopa_lane_za16(uint64_t, svbfloat16x2_t, svbfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za16_f16_f16))) +void svtmopa_lane_za16_f16_f16(uint64_t, svfloat16x2_t, svfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za16_f16_f16))) +void svtmopa_lane_za16(uint64_t, svfloat16x2_t, svfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za16_mf8_mf8_fpm))) +void svtmopa_lane_za16_mf8_mf8_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, svuint8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za16_mf8_mf8_fpm))) +void svtmopa_lane_za16_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, svuint8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_mf8_mf8_fpm))) +void svtmopa_lane_za32_mf8_mf8_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, svuint8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svtmopa_lane_za32_mf8_mf8_fpm))) +void svtmopa_lane_za32_fpm(uint64_t, svmfloat8x2_t, svmfloat8_t, svuint8_t, uint64_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u8))) svuint8_t svreadz_hor_za128_u8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u32))) @@ -2804,6 +3478,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s3 svint32_t svreadz_hor_za128_s32(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s64))) svint64_t svreadz_hor_za128_s64(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_mf8))) +svmfloat8_t svreadz_hor_za128_mf8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s16))) svint16_t svreadz_hor_za128_s16(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16))) @@ -2870,14 +3546,20 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8)) svuint8_t svreadz_hor_za8_u8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8))) svint8_t svreadz_hor_za8_s8(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_mf8))) +svmfloat8_t svreadz_hor_za8_mf8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8_vg2))) svuint8x2_t svreadz_hor_za8_u8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8_vg2))) svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_mf8_vg2))) +svmfloat8x2_t svreadz_hor_za8_mf8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8_vg4))) svuint8x4_t svreadz_hor_za8_u8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8_vg4))) svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_mf8_vg4))) +svmfloat8x4_t svreadz_hor_za8_mf8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u8))) svuint8_t svreadz_ver_za128_u8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u32))) @@ -2900,6 +3582,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s3 svint32_t svreadz_ver_za128_s32(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s64))) svint64_t svreadz_ver_za128_s64(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_mf8))) +svmfloat8_t svreadz_ver_za128_mf8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s16))) svint16_t svreadz_ver_za128_s16(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16))) @@ -2966,14 +3650,20 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8)) svuint8_t svreadz_ver_za8_u8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8))) svint8_t svreadz_ver_za8_s8(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_mf8))) +svmfloat8_t svreadz_ver_za8_mf8(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8_vg2))) svuint8x2_t svreadz_ver_za8_u8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8_vg2))) svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_mf8_vg2))) +svmfloat8x2_t svreadz_ver_za8_mf8_vg2(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8_vg4))) svuint8x4_t svreadz_ver_za8_u8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8_vg4))) svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_mf8_vg4))) +svmfloat8x4_t svreadz_ver_za8_mf8_vg4(uint64_t, uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_u16_vg1x2))) svuint16x2_t svreadz_za16_u16_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_bf16_vg1x2))) @@ -3018,10 +3708,14 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_u8_vg1x2 svuint8x2_t svreadz_za8_u8_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_s8_vg1x2))) svint8x2_t svreadz_za8_s8_vg1x2(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_mf8_vg1x2))) +svmfloat8x2_t svreadz_za8_mf8_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_u8_vg1x4))) svuint8x4_t svreadz_za8_u8_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_s8_vg1x4))) svint8x4_t svreadz_za8_s8_vg1x4(uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_mf8_vg1x4))) +svmfloat8x4_t svreadz_za8_mf8_vg1x4(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg1x2))) void svzero_za64_vg1x2(uint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg1x4))) diff --git a/lib/include/arm_sve.h b/lib/include/arm_sve.h index 35e4644b60..6a036be08c 100644 --- a/lib/include/arm_sve.h +++ b/lib/include/arm_sve.h @@ -3359,6 +3359,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2))) svint32x2_t svsel_s32_x2(svcount_t, svint32x2_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2))) svint64x2_t svsel_s64_x2(svcount_t, svint64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_mf8_x2))) +svmfloat8x2_t svsel_mf8_x2(svcount_t, svmfloat8x2_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2))) svint16x2_t svsel_s16_x2(svcount_t, svint16x2_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4))) @@ -3383,6 +3385,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4))) svint32x4_t svsel_s32_x4(svcount_t, svint32x4_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4))) svint64x4_t svsel_s64_x4(svcount_t, svint64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_mf8_x4))) +svmfloat8x4_t svsel_mf8_x4(svcount_t, svmfloat8x4_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4))) svint16x4_t svsel_s16_x4(svcount_t, svint16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2))) @@ -3431,6 +3435,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2))) svint32x2_t svuzp_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2))) svint64x2_t svuzp_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_mf8_x2))) +svmfloat8x2_t svuzp_mf8_x2(svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2))) svint16x2_t svuzp_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4))) @@ -3455,6 +3461,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4))) svint32x4_t svuzp_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4))) svint64x4_t svuzp_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_mf8_x4))) +svmfloat8x4_t svuzp_mf8_x4(svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4))) svint16x4_t svuzp_s16_x4(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2))) @@ -3479,6 +3487,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2))) svint32x2_t svuzpq_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2))) svint64x2_t svuzpq_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_mf8_x2))) +svmfloat8x2_t svuzpq_mf8_x2(svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2))) svint16x2_t svuzpq_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4))) @@ -3503,6 +3513,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4))) svint32x4_t svuzpq_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4))) svint64x4_t svuzpq_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_mf8_x4))) +svmfloat8x4_t svuzpq_mf8_x4(svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4))) svint16x4_t svuzpq_s16_x4(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2))) @@ -3527,6 +3539,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2))) svint32x2_t svzip_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2))) svint64x2_t svzip_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_mf8_x2))) +svmfloat8x2_t svzip_mf8_x2(svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2))) svint16x2_t svzip_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4))) @@ -3551,6 +3565,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4))) svint32x4_t svzip_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4))) svint64x4_t svzip_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_mf8_x4))) +svmfloat8x4_t svzip_mf8_x4(svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4))) svint16x4_t svzip_s16_x4(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2))) @@ -3575,6 +3591,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2))) svint32x2_t svzipq_s32_x2(svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2))) svint64x2_t svzipq_s64_x2(svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_mf8_x2))) +svmfloat8x2_t svzipq_mf8_x2(svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2))) svint16x2_t svzipq_s16_x2(svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4))) @@ -3599,6 +3617,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4))) svint32x4_t svzipq_s32_x4(svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) svint64x4_t svzipq_s64_x4(svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_mf8_x4))) +svmfloat8x4_t svzipq_mf8_x4(svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) svint16x4_t svzipq_s16_x4(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_single_u8_x2))) @@ -4119,6 +4139,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x2))) svint32x2_t svsel(svcount_t, svint32x2_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x2))) svint64x2_t svsel(svcount_t, svint64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_mf8_x2))) +svmfloat8x2_t svsel(svcount_t, svmfloat8x2_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x2))) svint16x2_t svsel(svcount_t, svint16x2_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u8_x4))) @@ -4143,6 +4165,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s32_x4))) svint32x4_t svsel(svcount_t, svint32x4_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s64_x4))) svint64x4_t svsel(svcount_t, svint64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_mf8_x4))) +svmfloat8x4_t svsel(svcount_t, svmfloat8x4_t, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s16_x4))) svint16x4_t svsel(svcount_t, svint16x4_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpk_s32_s16_x2))) @@ -4191,6 +4215,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x2))) svint32x2_t svuzp(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x2))) svint64x2_t svuzp(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_mf8_x2))) +svmfloat8x2_t svuzp(svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x2))) svint16x2_t svuzp(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_u8_x4))) @@ -4215,6 +4241,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s32_x4))) svint32x4_t svuzp(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s64_x4))) svint64x4_t svuzp(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_mf8_x4))) +svmfloat8x4_t svuzp(svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp_s16_x4))) svint16x4_t svuzp(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x2))) @@ -4239,6 +4267,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x2))) svint32x2_t svuzpq(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x2))) svint64x2_t svuzpq(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_mf8_x2))) +svmfloat8x2_t svuzpq(svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x2))) svint16x2_t svuzpq(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_u8_x4))) @@ -4263,6 +4293,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s32_x4))) svint32x4_t svuzpq(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s64_x4))) svint64x4_t svuzpq(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_mf8_x4))) +svmfloat8x4_t svuzpq(svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq_s16_x4))) svint16x4_t svuzpq(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x2))) @@ -4287,6 +4319,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x2))) svint32x2_t svzip(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x2))) svint64x2_t svzip(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_mf8_x2))) +svmfloat8x2_t svzip(svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x2))) svint16x2_t svzip(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_u8_x4))) @@ -4311,6 +4345,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s32_x4))) svint32x4_t svzip(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s64_x4))) svint64x4_t svzip(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_mf8_x4))) +svmfloat8x4_t svzip(svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip_s16_x4))) svint16x4_t svzip(svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x2))) @@ -4335,6 +4371,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x2))) svint32x2_t svzipq(svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x2))) svint64x2_t svzipq(svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_mf8_x2))) +svmfloat8x2_t svzipq(svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x2))) svint16x2_t svzipq(svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_u8_x4))) @@ -4359,6 +4397,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s32_x4))) svint32x4_t svzipq(svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) svint64x4_t svzipq(svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_mf8_x4))) +svmfloat8x4_t svzipq(svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) svint16x4_t svzipq(svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x2))) @@ -4627,12 +4667,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) svint32_t svcompact_s32(svbool_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) svint64_t svcompact_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) -svfloat64_t svexpa_f64(svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) -svfloat32_t svexpa_f32(svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) -svfloat16_t svexpa_f16(svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_u32base_index_u32(svbool_t, svuint32_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) @@ -4957,6 +4991,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u64))) svuint64_t svldff1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u16))) svuint16_t svldff1_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) +svbfloat16_t svldff1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s8))) svint8_t svldff1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f64))) @@ -4969,6 +5005,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s32))) svint32_t svldff1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s64))) svint64_t svldff1_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_mf8))) +svmfloat8_t svldff1_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s16))) svint16_t svldff1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_u32))) @@ -5063,6 +5101,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u64))) svuint64_t svldff1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u16))) svuint16_t svldff1_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) +svbfloat16_t svldff1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s8))) svint8_t svldff1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f64))) @@ -5075,6 +5115,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s32))) svint32_t svldff1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s64))) svint64_t svldff1_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_mf8))) +svmfloat8_t svldff1_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s16))) svint16_t svldff1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_u32))) @@ -5413,6 +5455,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u64))) svuint64_t svldnf1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u16))) svuint16_t svldnf1_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) +svbfloat16_t svldnf1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s8))) svint8_t svldnf1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f64))) @@ -5425,6 +5469,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s32))) svint32_t svldnf1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s64))) svint64_t svldnf1_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_mf8))) +svmfloat8_t svldnf1_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s16))) svint16_t svldnf1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u8))) @@ -5435,6 +5481,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u64))) svuint64_t svldnf1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u16))) svuint16_t svldnf1_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) +svbfloat16_t svldnf1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s8))) svint8_t svldnf1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f64))) @@ -5447,6 +5495,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s32))) svint32_t svldnf1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s64))) svint64_t svldnf1_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_mf8))) +svmfloat8_t svldnf1_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s16))) svint16_t svldnf1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1sb_vnum_u32))) @@ -5885,12 +5935,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s32))) svint32_t svcompact(svbool_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcompact_s64))) svint64_t svcompact(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) -svfloat64_t svexpa(svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) -svfloat32_t svexpa(svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) -svfloat16_t svexpa(svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u32base_index_u32))) svuint32_t svld1_gather_index_u32(svbool_t, svuint32_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_gather_u64base_index_u64))) @@ -6215,6 +6259,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u64))) svuint64_t svldff1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_u16))) svuint16_t svldff1(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) +svbfloat16_t svldff1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s8))) svint8_t svldff1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_f64))) @@ -6227,6 +6273,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s32))) svint32_t svldff1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s64))) svint64_t svldff1(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_mf8))) +svmfloat8_t svldff1(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_s16))) svint16_t svldff1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_gather_u32base_index_u32))) @@ -6321,6 +6369,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u64))) svuint64_t svldff1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_u16))) svuint16_t svldff1_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) +svbfloat16_t svldff1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s8))) svint8_t svldff1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_f64))) @@ -6333,6 +6383,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s32))) svint32_t svldff1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s64))) svint64_t svldff1_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_mf8))) +svmfloat8_t svldff1_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_s16))) svint16_t svldff1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1sb_gather_u32base_offset_u32))) @@ -6575,6 +6627,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u64))) svuint64_t svldnf1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_u16))) svuint16_t svldnf1(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) +svbfloat16_t svldnf1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s8))) svint8_t svldnf1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_f64))) @@ -6587,6 +6641,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s32))) svint32_t svldnf1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s64))) svint64_t svldnf1(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_mf8))) +svmfloat8_t svldnf1(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_s16))) svint16_t svldnf1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u8))) @@ -6597,6 +6653,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u64))) svuint64_t svldnf1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_u16))) svuint16_t svldnf1_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) +svbfloat16_t svldnf1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s8))) svint8_t svldnf1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_f64))) @@ -6609,6 +6667,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s32))) svint32_t svldnf1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s64))) svint64_t svldnf1_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_mf8))) +svmfloat8_t svldnf1_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_s16))) svint16_t svldnf1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base))) @@ -6893,50 +6953,1834 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f32))) svfloat32_t svtssel(svfloat32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f16))) svfloat16_t svtssel(svfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) +svint8_t svclamp_s8(svint8_t, svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) +svint32_t svclamp_s32(svint32_t, svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) +svint64_t svclamp_s64(svint64_t, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) +svint16_t svclamp_s16(svint16_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) +svuint8_t svclamp_u8(svuint8_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) +svuint32_t svclamp_u32(svuint32_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) +svuint64_t svclamp_u64(svuint64_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) +svuint16_t svclamp_u16(svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b16))) +svbool_t svpsel_lane_b16(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b32))) +svbool_t svpsel_lane_b32(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b64))) +svbool_t svpsel_lane_b64(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b8))) +svbool_t svpsel_lane_b8(svbool_t, svbool_t, uint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) +svuint8_t svrevd_u8_m(svuint8_t, svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) +svuint32_t svrevd_u32_m(svuint32_t, svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) +svuint64_t svrevd_u64_m(svuint64_t, svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) +svuint16_t svrevd_u16_m(svuint16_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) +svbfloat16_t svrevd_bf16_m(svbfloat16_t, svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) +svint8_t svrevd_s8_m(svint8_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) +svfloat64_t svrevd_f64_m(svfloat64_t, svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) +svfloat32_t svrevd_f32_m(svfloat32_t, svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) +svfloat16_t svrevd_f16_m(svfloat16_t, svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) +svint32_t svrevd_s32_m(svint32_t, svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) +svint64_t svrevd_s64_m(svint64_t, svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_mf8_m))) +svmfloat8_t svrevd_mf8_m(svmfloat8_t, svbool_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) +svint16_t svrevd_s16_m(svint16_t, svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) +svuint8_t svrevd_u8_x(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) +svuint32_t svrevd_u32_x(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) +svuint64_t svrevd_u64_x(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) +svuint16_t svrevd_u16_x(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) +svbfloat16_t svrevd_bf16_x(svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) +svint8_t svrevd_s8_x(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) +svfloat64_t svrevd_f64_x(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) +svfloat32_t svrevd_f32_x(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) +svfloat16_t svrevd_f16_x(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) +svint32_t svrevd_s32_x(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) +svint64_t svrevd_s64_x(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_mf8_x))) +svmfloat8_t svrevd_mf8_x(svbool_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) +svint16_t svrevd_s16_x(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) +svuint8_t svrevd_u8_z(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) +svuint32_t svrevd_u32_z(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) +svuint64_t svrevd_u64_z(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) +svuint16_t svrevd_u16_z(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) +svbfloat16_t svrevd_bf16_z(svbool_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) +svint8_t svrevd_s8_z(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) +svfloat64_t svrevd_f64_z(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) +svfloat32_t svrevd_f32_z(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) +svfloat16_t svrevd_f16_z(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) +svint32_t svrevd_s32_z(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) +svint64_t svrevd_s64_z(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_mf8_z))) +svmfloat8_t svrevd_mf8_z(svbool_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) +svint16_t svrevd_s16_z(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) +svint8_t svclamp(svint8_t, svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) +svint32_t svclamp(svint32_t, svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) +svint64_t svclamp(svint64_t, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) +svint16_t svclamp(svint16_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) +svuint8_t svclamp(svuint8_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) +svuint32_t svclamp(svuint32_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) +svuint64_t svclamp(svuint64_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) +svuint16_t svclamp(svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) +svuint8_t svrevd_m(svuint8_t, svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) +svuint32_t svrevd_m(svuint32_t, svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) +svuint64_t svrevd_m(svuint64_t, svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) +svuint16_t svrevd_m(svuint16_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) +svbfloat16_t svrevd_m(svbfloat16_t, svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) +svint8_t svrevd_m(svint8_t, svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) +svfloat64_t svrevd_m(svfloat64_t, svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) +svfloat32_t svrevd_m(svfloat32_t, svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) +svfloat16_t svrevd_m(svfloat16_t, svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) +svint32_t svrevd_m(svint32_t, svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) +svint64_t svrevd_m(svint64_t, svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_mf8_m))) +svmfloat8_t svrevd_m(svmfloat8_t, svbool_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) +svint16_t svrevd_m(svint16_t, svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) +svuint8_t svrevd_x(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) +svuint32_t svrevd_x(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) +svuint64_t svrevd_x(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) +svuint16_t svrevd_x(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) +svbfloat16_t svrevd_x(svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) +svint8_t svrevd_x(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) +svfloat64_t svrevd_x(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) +svfloat32_t svrevd_x(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) +svfloat16_t svrevd_x(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) +svint32_t svrevd_x(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) +svint64_t svrevd_x(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_mf8_x))) +svmfloat8_t svrevd_x(svbool_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) +svint16_t svrevd_x(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) +svuint8_t svrevd_z(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) +svuint32_t svrevd_z(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) +svuint64_t svrevd_z(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) +svuint16_t svrevd_z(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) +svbfloat16_t svrevd_z(svbool_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) +svint8_t svrevd_z(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) +svfloat64_t svrevd_z(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) +svfloat32_t svrevd_z(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) +svfloat16_t svrevd_z(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) +svint32_t svrevd_z(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) +svint64_t svrevd_z(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_mf8_z))) +svmfloat8_t svrevd_z(svbool_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) +svint16_t svrevd_z(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) +svfloat32_t svbfmlslb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) +svfloat32_t svbfmlslb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) +svfloat32_t svbfmlslt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) +svfloat32_t svbfmlslt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) +svfloat64_t svclamp_f64(svfloat64_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) +svfloat32_t svclamp_f32(svfloat32_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) +svfloat16_t svclamp_f16(svfloat16_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) +svboolx2_t svcreate2_b(svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) +svboolx4_t svcreate4_b(svbool_t, svbool_t, svbool_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) +svfloat32_t svdot_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) +svint32_t svdot_s32_s16(svint32_t, svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) +svuint32_t svdot_u32_u16(svuint32_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) +svfloat32_t svdot_lane_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) +svint32_t svdot_lane_s32_s16(svint32_t, svint16_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) +svuint32_t svdot_lane_u32_u16(svuint32_t, svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) +svbool_t svget2_b(svboolx2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) +svbool_t svget4_b(svboolx4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) +svint16_t svqcvtn_s16_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) +svuint16_t svqcvtn_u16_s32_x2(svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) +svuint16_t svqcvtn_u16_u32_x2(svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) +svint16_t svqrshrn_n_s16_s32_x2(svint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) +svuint16_t svqrshrn_n_u16_u32_x2(svuint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) +svuint16_t svqrshrun_n_u16_s32_x2(svint32x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) +svboolx2_t svset2_b(svboolx2_t, uint64_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) +svboolx4_t svset4_b(svboolx4_t, uint64_t, svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_b))) +svboolx2_t svundef2_b(); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_b))) +svboolx4_t svundef4_b(); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) +svboolx2_t svwhilege_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) +svboolx2_t svwhilege_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) +svboolx2_t svwhilege_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) +svboolx2_t svwhilege_b16_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) +svboolx2_t svwhilege_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) +svboolx2_t svwhilege_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) +svboolx2_t svwhilege_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) +svboolx2_t svwhilege_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) +svboolx2_t svwhilegt_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) +svboolx2_t svwhilegt_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) +svboolx2_t svwhilegt_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) +svboolx2_t svwhilegt_b16_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) +svboolx2_t svwhilegt_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) +svboolx2_t svwhilegt_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) +svboolx2_t svwhilegt_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) +svboolx2_t svwhilegt_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) +svboolx2_t svwhilele_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) +svboolx2_t svwhilele_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) +svboolx2_t svwhilele_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) +svboolx2_t svwhilele_b16_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) +svboolx2_t svwhilele_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) +svboolx2_t svwhilele_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) +svboolx2_t svwhilele_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) +svboolx2_t svwhilele_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) +svboolx2_t svwhilelt_b8_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) +svboolx2_t svwhilelt_b32_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) +svboolx2_t svwhilelt_b64_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) +svboolx2_t svwhilelt_b16_u64_x2(uint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) +svboolx2_t svwhilelt_b8_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) +svboolx2_t svwhilelt_b32_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) +svboolx2_t svwhilelt_b64_s64_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) +svboolx2_t svwhilelt_b16_s64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) +svfloat32_t svbfmlslb(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) +svfloat32_t svbfmlslb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) +svfloat32_t svbfmlslt(svfloat32_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) +svfloat32_t svbfmlslt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) +svfloat64_t svclamp(svfloat64_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) +svfloat32_t svclamp(svfloat32_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) +svfloat16_t svclamp(svfloat16_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) +svboolx2_t svcreate2(svbool_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) +svboolx4_t svcreate4(svbool_t, svbool_t, svbool_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) +svfloat32_t svdot(svfloat32_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) +svint32_t svdot(svint32_t, svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) +svuint32_t svdot(svuint32_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) +svfloat32_t svdot_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) +svint32_t svdot_lane(svint32_t, svint16_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) +svuint32_t svdot_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) +svbool_t svget2(svboolx2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) +svbool_t svget4(svboolx4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) +svint16_t svqcvtn_s16(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) +svuint16_t svqcvtn_u16(svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) +svuint16_t svqcvtn_u16(svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) +svint16_t svqrshrn_s16(svint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) +svuint16_t svqrshrn_u16(svuint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) +svuint16_t svqrshrun_u16(svint32x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) +svboolx2_t svset2(svboolx2_t, uint64_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) +svboolx4_t svset4(svboolx4_t, uint64_t, svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) +svboolx2_t svwhilege_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) +svboolx2_t svwhilege_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) +svboolx2_t svwhilege_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) +svboolx2_t svwhilege_b16_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) +svboolx2_t svwhilege_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) +svboolx2_t svwhilege_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) +svboolx2_t svwhilege_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) +svboolx2_t svwhilege_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) +svboolx2_t svwhilegt_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) +svboolx2_t svwhilegt_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) +svboolx2_t svwhilegt_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) +svboolx2_t svwhilegt_b16_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) +svboolx2_t svwhilegt_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) +svboolx2_t svwhilegt_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) +svboolx2_t svwhilegt_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) +svboolx2_t svwhilegt_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) +svboolx2_t svwhilele_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) +svboolx2_t svwhilele_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) +svboolx2_t svwhilele_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) +svboolx2_t svwhilele_b16_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) +svboolx2_t svwhilele_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) +svboolx2_t svwhilele_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) +svboolx2_t svwhilele_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) +svboolx2_t svwhilele_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) +svboolx2_t svwhilelt_b8_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) +svboolx2_t svwhilelt_b32_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) +svboolx2_t svwhilelt_b64_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) +svboolx2_t svwhilelt_b16_x2(uint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) +svboolx2_t svwhilelt_b8_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) +svboolx2_t svwhilelt_b32_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) +svboolx2_t svwhilelt_b64_x2(int64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) +svboolx2_t svwhilelt_b16_x2(int64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) +uint8x16_t svaddqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) +uint32x4_t svaddqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) +uint64x2_t svaddqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) +uint16x8_t svaddqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) +int8x16_t svaddqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) +int32x4_t svaddqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) +int64x2_t svaddqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) +int16x8_t svaddqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) +float64x2_t svaddqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) +float32x4_t svaddqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) +float16x8_t svaddqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) +uint8x16_t svandqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) +uint32x4_t svandqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) +uint64x2_t svandqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) +uint16x8_t svandqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) +int8x16_t svandqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) +int32x4_t svandqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) +int64x2_t svandqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) +int16x8_t svandqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) +svuint8_t svdup_laneq_u8(svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) +svint8_t svdup_laneq_s8(svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_mf8))) +svmfloat8_t svdup_laneq_mf8(svmfloat8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) +svuint64_t svdup_laneq_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) +svfloat64_t svdup_laneq_f64(svfloat64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s64))) +svint64_t svdup_laneq_s64(svint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u16))) +svuint16_t svdup_laneq_u16(svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) +svbfloat16_t svdup_laneq_bf16(svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f16))) +svfloat16_t svdup_laneq_f16(svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s16))) +svint16_t svdup_laneq_s16(svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u32))) +svuint32_t svdup_laneq_u32(svuint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f32))) +svfloat32_t svdup_laneq_f32(svfloat32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s32))) +svint32_t svdup_laneq_s32(svint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) +uint8x16_t sveorqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) +uint32x4_t sveorqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) +uint64x2_t sveorqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) +uint16x8_t sveorqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) +int8x16_t sveorqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) +int32x4_t sveorqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) +int64x2_t sveorqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) +int16x8_t sveorqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) +svuint8_t svextq_u8(svuint8_t, svuint8_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) +svuint32_t svextq_u32(svuint32_t, svuint32_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) +svuint64_t svextq_u64(svuint64_t, svuint64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) +svuint16_t svextq_u16(svuint16_t, svuint16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) +svbfloat16_t svextq_bf16(svbfloat16_t, svbfloat16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) +svint8_t svextq_s8(svint8_t, svint8_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) +svfloat64_t svextq_f64(svfloat64_t, svfloat64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) +svfloat32_t svextq_f32(svfloat32_t, svfloat32_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) +svfloat16_t svextq_f16(svfloat16_t, svfloat16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) +svint32_t svextq_s32(svint32_t, svint32_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) +svint64_t svextq_s64(svint64_t, svint64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_mf8))) +svmfloat8_t svextq_mf8(svmfloat8_t, svmfloat8_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) +svint16_t svextq_s16(svint16_t, svint16_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) +svuint8x2_t svld2q_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) +svuint32x2_t svld2q_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) +svuint64x2_t svld2q_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) +svuint16x2_t svld2q_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) +svbfloat16x2_t svld2q_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) +svint8x2_t svld2q_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) +svfloat64x2_t svld2q_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) +svfloat32x2_t svld2q_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) +svfloat16x2_t svld2q_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) +svint32x2_t svld2q_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) +svint64x2_t svld2q_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_mf8))) +svmfloat8x2_t svld2q_mf8(svbool_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) +svint16x2_t svld2q_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) +svuint8x2_t svld2q_vnum_u8(svbool_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) +svuint32x2_t svld2q_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) +svuint64x2_t svld2q_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) +svuint16x2_t svld2q_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) +svbfloat16x2_t svld2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) +svint8x2_t svld2q_vnum_s8(svbool_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) +svfloat64x2_t svld2q_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) +svfloat32x2_t svld2q_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) +svfloat16x2_t svld2q_vnum_f16(svbool_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) +svint32x2_t svld2q_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) +svint64x2_t svld2q_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_mf8))) +svmfloat8x2_t svld2q_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) +svint16x2_t svld2q_vnum_s16(svbool_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) +svuint8x3_t svld3q_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) +svuint32x3_t svld3q_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) +svuint64x3_t svld3q_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) +svuint16x3_t svld3q_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) +svbfloat16x3_t svld3q_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) +svint8x3_t svld3q_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) +svfloat64x3_t svld3q_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) +svfloat32x3_t svld3q_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) +svfloat16x3_t svld3q_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) +svint32x3_t svld3q_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) +svint64x3_t svld3q_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_mf8))) +svmfloat8x3_t svld3q_mf8(svbool_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) +svint16x3_t svld3q_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) +svuint8x3_t svld3q_vnum_u8(svbool_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) +svuint32x3_t svld3q_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) +svuint64x3_t svld3q_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) +svuint16x3_t svld3q_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) +svbfloat16x3_t svld3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) +svint8x3_t svld3q_vnum_s8(svbool_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) +svfloat64x3_t svld3q_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) +svfloat32x3_t svld3q_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) +svfloat16x3_t svld3q_vnum_f16(svbool_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) +svint32x3_t svld3q_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) +svint64x3_t svld3q_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_mf8))) +svmfloat8x3_t svld3q_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) +svint16x3_t svld3q_vnum_s16(svbool_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) +svuint8x4_t svld4q_u8(svbool_t, uint8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) +svuint32x4_t svld4q_u32(svbool_t, uint32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) +svuint64x4_t svld4q_u64(svbool_t, uint64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) +svuint16x4_t svld4q_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) +svbfloat16x4_t svld4q_bf16(svbool_t, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) +svint8x4_t svld4q_s8(svbool_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) +svfloat64x4_t svld4q_f64(svbool_t, float64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) +svfloat32x4_t svld4q_f32(svbool_t, float32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) +svfloat16x4_t svld4q_f16(svbool_t, float16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) +svint32x4_t svld4q_s32(svbool_t, int32_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) +svint64x4_t svld4q_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_mf8))) +svmfloat8x4_t svld4q_mf8(svbool_t, mfloat8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) +svint16x4_t svld4q_s16(svbool_t, int16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) +svuint8x4_t svld4q_vnum_u8(svbool_t, uint8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) +svuint32x4_t svld4q_vnum_u32(svbool_t, uint32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) +svuint64x4_t svld4q_vnum_u64(svbool_t, uint64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) +svuint16x4_t svld4q_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) +svbfloat16x4_t svld4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) +svint8x4_t svld4q_vnum_s8(svbool_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) +svfloat64x4_t svld4q_vnum_f64(svbool_t, float64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) +svfloat32x4_t svld4q_vnum_f32(svbool_t, float32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) +svfloat16x4_t svld4q_vnum_f16(svbool_t, float16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) +svint32x4_t svld4q_vnum_s32(svbool_t, int32_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) +svint64x4_t svld4q_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_mf8))) +svmfloat8x4_t svld4q_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) +svint16x4_t svld4q_vnum_s16(svbool_t, int16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) +float64x2_t svmaxnmqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) +float32x4_t svmaxnmqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) +float16x8_t svmaxnmqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) +float64x2_t svmaxqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) +float32x4_t svmaxqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) +float16x8_t svmaxqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) +int8x16_t svmaxqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) +int32x4_t svmaxqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) +int64x2_t svmaxqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) +int16x8_t svmaxqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) +uint8x16_t svmaxqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) +uint32x4_t svmaxqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) +uint64x2_t svmaxqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) +uint16x8_t svmaxqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) +float64x2_t svminnmqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) +float32x4_t svminnmqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) +float16x8_t svminnmqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) +float64x2_t svminqv_f64(svbool_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) +float32x4_t svminqv_f32(svbool_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) +float16x8_t svminqv_f16(svbool_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) +int8x16_t svminqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) +int32x4_t svminqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) +int64x2_t svminqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) +int16x8_t svminqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) +uint8x16_t svminqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) +uint32x4_t svminqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) +uint64x2_t svminqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) +uint16x8_t svminqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) +uint8x16_t svorqv_u8(svbool_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) +uint32x4_t svorqv_u32(svbool_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) +uint64x2_t svorqv_u64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) +uint16x8_t svorqv_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) +int8x16_t svorqv_s8(svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) +int32x4_t svorqv_s32(svbool_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) +int64x2_t svorqv_s64(svbool_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) +int16x8_t svorqv_s16(svbool_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) +svbool_t svpmov_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) +svbool_t svpmov_s8(svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) +svbool_t svpmov_u64(svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) +svbool_t svpmov_s64(svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) +svbool_t svpmov_u16(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) +svbool_t svpmov_s16(svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) +svbool_t svpmov_u32(svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) +svbool_t svpmov_s32(svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) +svbool_t svpmov_lane_u8(svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) +svbool_t svpmov_lane_s8(svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) +svbool_t svpmov_lane_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) +svbool_t svpmov_lane_s64(svint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) +svbool_t svpmov_lane_u16(svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) +svbool_t svpmov_lane_s16(svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) +svbool_t svpmov_lane_u32(svuint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) +svbool_t svpmov_lane_s32(svint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) +svuint64_t svpmov_lane_u64_m(svuint64_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) +svint64_t svpmov_lane_s64_m(svint64_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) +svuint16_t svpmov_lane_u16_m(svuint16_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) +svint16_t svpmov_lane_s16_m(svint16_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) +svuint32_t svpmov_lane_u32_m(svuint32_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) +svint32_t svpmov_lane_s32_m(svint32_t, svbool_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8_z))) +svuint8_t svpmov_u8_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8_z))) +svint8_t svpmov_s8_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64_z))) +svuint64_t svpmov_u64_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64_z))) +svint64_t svpmov_s64_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16_z))) +svuint16_t svpmov_u16_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16_z))) +svint16_t svpmov_s16_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32_z))) +svuint32_t svpmov_u32_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32_z))) +svint32_t svpmov_s32_z(svbool_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) +void svst2q_u8(svbool_t, uint8_t const *, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) +void svst2q_u32(svbool_t, uint32_t const *, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) +void svst2q_u64(svbool_t, uint64_t const *, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) +void svst2q_u16(svbool_t, uint16_t const *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) +void svst2q_bf16(svbool_t, bfloat16_t const *, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) +void svst2q_s8(svbool_t, int8_t const *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) +void svst2q_f64(svbool_t, float64_t const *, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) +void svst2q_f32(svbool_t, float32_t const *, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) +void svst2q_f16(svbool_t, float16_t const *, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) +void svst2q_s32(svbool_t, int32_t const *, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) +void svst2q_s64(svbool_t, int64_t const *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_mf8))) +void svst2q_mf8(svbool_t, mfloat8_t const *, svmfloat8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) +void svst2q_s16(svbool_t, int16_t const *, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) +void svst2q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) +void svst2q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) +void svst2q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) +void svst2q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) +void svst2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) +void svst2q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) +void svst2q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) +void svst2q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) +void svst2q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) +void svst2q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) +void svst2q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_mf8))) +void svst2q_vnum_mf8(svbool_t, mfloat8_t const *, int64_t, svmfloat8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) +void svst2q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) +void svst3q_u8(svbool_t, uint8_t const *, svuint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) +void svst3q_u32(svbool_t, uint32_t const *, svuint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) +void svst3q_u64(svbool_t, uint64_t const *, svuint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) +void svst3q_u16(svbool_t, uint16_t const *, svuint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) +void svst3q_bf16(svbool_t, bfloat16_t const *, svbfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) +void svst3q_s8(svbool_t, int8_t const *, svint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) +void svst3q_f64(svbool_t, float64_t const *, svfloat64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) +void svst3q_f32(svbool_t, float32_t const *, svfloat32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) +void svst3q_f16(svbool_t, float16_t const *, svfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) +void svst3q_s32(svbool_t, int32_t const *, svint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) +void svst3q_s64(svbool_t, int64_t const *, svint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_mf8))) +void svst3q_mf8(svbool_t, mfloat8_t const *, svmfloat8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) +void svst3q_s16(svbool_t, int16_t const *, svint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) +void svst3q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) +void svst3q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) +void svst3q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) +void svst3q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) +void svst3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) +void svst3q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) +void svst3q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) +void svst3q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) +void svst3q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) +void svst3q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) +void svst3q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_mf8))) +void svst3q_vnum_mf8(svbool_t, mfloat8_t const *, int64_t, svmfloat8x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) +void svst3q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) +void svst4q_u8(svbool_t, uint8_t const *, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) +void svst4q_u32(svbool_t, uint32_t const *, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) +void svst4q_u64(svbool_t, uint64_t const *, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) +void svst4q_u16(svbool_t, uint16_t const *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) +void svst4q_bf16(svbool_t, bfloat16_t const *, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) +void svst4q_s8(svbool_t, int8_t const *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) +void svst4q_f64(svbool_t, float64_t const *, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) +void svst4q_f32(svbool_t, float32_t const *, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) +void svst4q_f16(svbool_t, float16_t const *, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) +void svst4q_s32(svbool_t, int32_t const *, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) +void svst4q_s64(svbool_t, int64_t const *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_mf8))) +void svst4q_mf8(svbool_t, mfloat8_t const *, svmfloat8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) +void svst4q_s16(svbool_t, int16_t const *, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) +void svst4q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) +void svst4q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) +void svst4q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) +void svst4q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) +void svst4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) +void svst4q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) +void svst4q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) +void svst4q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) +void svst4q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) +void svst4q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) +void svst4q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_mf8))) +void svst4q_vnum_mf8(svbool_t, mfloat8_t const *, int64_t, svmfloat8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) +void svst4q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) +svuint8_t svtblq_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) +svuint32_t svtblq_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) +svuint64_t svtblq_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) +svuint16_t svtblq_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) +svbfloat16_t svtblq_bf16(svbfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) +svint8_t svtblq_s8(svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) +svfloat64_t svtblq_f64(svfloat64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) +svfloat32_t svtblq_f32(svfloat32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) +svfloat16_t svtblq_f16(svfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) +svint32_t svtblq_s32(svint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) +svint64_t svtblq_s64(svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_mf8))) +svmfloat8_t svtblq_mf8(svmfloat8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) +svint16_t svtblq_s16(svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) +svuint8_t svtbxq_u8(svuint8_t, svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) +svuint32_t svtbxq_u32(svuint32_t, svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) +svuint64_t svtbxq_u64(svuint64_t, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) +svuint16_t svtbxq_u16(svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) +svbfloat16_t svtbxq_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) +svint8_t svtbxq_s8(svint8_t, svint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) +svfloat64_t svtbxq_f64(svfloat64_t, svfloat64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) +svfloat32_t svtbxq_f32(svfloat32_t, svfloat32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) +svfloat16_t svtbxq_f16(svfloat16_t, svfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) +svint32_t svtbxq_s32(svint32_t, svint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) +svint64_t svtbxq_s64(svint64_t, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_mf8))) +svmfloat8_t svtbxq_mf8(svmfloat8_t, svmfloat8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) +svint16_t svtbxq_s16(svint16_t, svint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) +svuint8_t svuzpq1_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) +svuint32_t svuzpq1_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) +svuint64_t svuzpq1_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) +svuint16_t svuzpq1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) +svbfloat16_t svuzpq1_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) +svint8_t svuzpq1_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) +svfloat64_t svuzpq1_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) +svfloat32_t svuzpq1_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) +svfloat16_t svuzpq1_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) +svint32_t svuzpq1_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) +svint64_t svuzpq1_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_mf8))) +svmfloat8_t svuzpq1_mf8(svmfloat8_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) +svint16_t svuzpq1_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) +svuint8_t svuzpq2_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) +svuint32_t svuzpq2_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) +svuint64_t svuzpq2_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) +svuint16_t svuzpq2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) +svbfloat16_t svuzpq2_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) +svint8_t svuzpq2_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) +svfloat64_t svuzpq2_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) +svfloat32_t svuzpq2_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) +svfloat16_t svuzpq2_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) +svint32_t svuzpq2_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) +svint64_t svuzpq2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_mf8))) +svmfloat8_t svuzpq2_mf8(svmfloat8_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) +svint16_t svuzpq2_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) +svuint8_t svzipq1_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) +svuint32_t svzipq1_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) +svuint64_t svzipq1_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) +svuint16_t svzipq1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) +svbfloat16_t svzipq1_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) +svint8_t svzipq1_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) +svfloat64_t svzipq1_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) +svfloat32_t svzipq1_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) +svfloat16_t svzipq1_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) +svint32_t svzipq1_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) +svint64_t svzipq1_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_mf8))) +svmfloat8_t svzipq1_mf8(svmfloat8_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) +svint16_t svzipq1_s16(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) +svuint8_t svzipq2_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) +svuint32_t svzipq2_u32(svuint32_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) +svuint64_t svzipq2_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) +svuint16_t svzipq2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) +svbfloat16_t svzipq2_bf16(svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) +svint8_t svzipq2_s8(svint8_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) +svfloat64_t svzipq2_f64(svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) +svfloat32_t svzipq2_f32(svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) +svfloat16_t svzipq2_f16(svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) +svint32_t svzipq2_s32(svint32_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) +svint64_t svzipq2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) +svmfloat8_t svzipq2_mf8(svmfloat8_t, svmfloat8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) +svint16_t svzipq2_s16(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) +uint8x16_t svaddqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) +uint32x4_t svaddqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) +uint64x2_t svaddqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) +uint16x8_t svaddqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) +int8x16_t svaddqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) +int32x4_t svaddqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) +int64x2_t svaddqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) +int16x8_t svaddqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) +float64x2_t svaddqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) +float32x4_t svaddqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) +float16x8_t svaddqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) +uint8x16_t svandqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) +uint32x4_t svandqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) +uint64x2_t svandqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) +uint16x8_t svandqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) +int8x16_t svandqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) +int32x4_t svandqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) +int64x2_t svandqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) +int16x8_t svandqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) +svuint8_t svdup_laneq(svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) +svint8_t svdup_laneq(svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_mf8))) +svmfloat8_t svdup_laneq(svmfloat8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) +svuint64_t svdup_laneq(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) +svfloat64_t svdup_laneq(svfloat64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s64))) +svint64_t svdup_laneq(svint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u16))) +svuint16_t svdup_laneq(svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) +svbfloat16_t svdup_laneq(svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f16))) +svfloat16_t svdup_laneq(svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s16))) +svint16_t svdup_laneq(svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u32))) +svuint32_t svdup_laneq(svuint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f32))) +svfloat32_t svdup_laneq(svfloat32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s32))) +svint32_t svdup_laneq(svint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) +uint8x16_t sveorqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) +uint32x4_t sveorqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) +uint64x2_t sveorqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) +uint16x8_t sveorqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) +int8x16_t sveorqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) +int32x4_t sveorqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) +int64x2_t sveorqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) +int16x8_t sveorqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) +svuint8_t svextq(svuint8_t, svuint8_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) +svuint32_t svextq(svuint32_t, svuint32_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) +svuint64_t svextq(svuint64_t, svuint64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) +svuint16_t svextq(svuint16_t, svuint16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) +svbfloat16_t svextq(svbfloat16_t, svbfloat16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) +svint8_t svextq(svint8_t, svint8_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) +svfloat64_t svextq(svfloat64_t, svfloat64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) +svfloat32_t svextq(svfloat32_t, svfloat32_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) +svfloat16_t svextq(svfloat16_t, svfloat16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) +svint32_t svextq(svint32_t, svint32_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) +svint64_t svextq(svint64_t, svint64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_mf8))) +svmfloat8_t svextq(svmfloat8_t, svmfloat8_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) +svint16_t svextq(svint16_t, svint16_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) +svuint8x2_t svld2q(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) +svuint32x2_t svld2q(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) +svuint64x2_t svld2q(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) +svuint16x2_t svld2q(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) +svbfloat16x2_t svld2q(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) +svint8x2_t svld2q(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) +svfloat64x2_t svld2q(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) +svfloat32x2_t svld2q(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) +svfloat16x2_t svld2q(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) +svint32x2_t svld2q(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) +svint64x2_t svld2q(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_mf8))) +svmfloat8x2_t svld2q(svbool_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) +svint16x2_t svld2q(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) +svuint8x2_t svld2q_vnum(svbool_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) +svuint32x2_t svld2q_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) +svuint64x2_t svld2q_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) +svuint16x2_t svld2q_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) +svbfloat16x2_t svld2q_vnum(svbool_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) +svint8x2_t svld2q_vnum(svbool_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) +svfloat64x2_t svld2q_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) +svfloat32x2_t svld2q_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) +svfloat16x2_t svld2q_vnum(svbool_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) +svint32x2_t svld2q_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) +svint64x2_t svld2q_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_mf8))) +svmfloat8x2_t svld2q_vnum(svbool_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) +svint16x2_t svld2q_vnum(svbool_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) +svuint8x3_t svld3q(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) +svuint32x3_t svld3q(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) +svuint64x3_t svld3q(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) +svuint16x3_t svld3q(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) +svbfloat16x3_t svld3q(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) +svint8x3_t svld3q(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) +svfloat64x3_t svld3q(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) +svfloat32x3_t svld3q(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) +svfloat16x3_t svld3q(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) +svint32x3_t svld3q(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) +svint64x3_t svld3q(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_mf8))) +svmfloat8x3_t svld3q(svbool_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) +svint16x3_t svld3q(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) +svuint8x3_t svld3q_vnum(svbool_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) +svuint32x3_t svld3q_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) +svuint64x3_t svld3q_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) +svuint16x3_t svld3q_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) +svbfloat16x3_t svld3q_vnum(svbool_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) +svint8x3_t svld3q_vnum(svbool_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) +svfloat64x3_t svld3q_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) +svfloat32x3_t svld3q_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) +svfloat16x3_t svld3q_vnum(svbool_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) +svint32x3_t svld3q_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) +svint64x3_t svld3q_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_mf8))) +svmfloat8x3_t svld3q_vnum(svbool_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) +svint16x3_t svld3q_vnum(svbool_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) +svuint8x4_t svld4q(svbool_t, uint8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) +svuint32x4_t svld4q(svbool_t, uint32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) +svuint64x4_t svld4q(svbool_t, uint64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) +svuint16x4_t svld4q(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) +svbfloat16x4_t svld4q(svbool_t, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) +svint8x4_t svld4q(svbool_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) +svfloat64x4_t svld4q(svbool_t, float64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) +svfloat32x4_t svld4q(svbool_t, float32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) +svfloat16x4_t svld4q(svbool_t, float16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) +svint32x4_t svld4q(svbool_t, int32_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) +svint64x4_t svld4q(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_mf8))) +svmfloat8x4_t svld4q(svbool_t, mfloat8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) +svint16x4_t svld4q(svbool_t, int16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) +svuint8x4_t svld4q_vnum(svbool_t, uint8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) +svuint32x4_t svld4q_vnum(svbool_t, uint32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) +svuint64x4_t svld4q_vnum(svbool_t, uint64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) +svuint16x4_t svld4q_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) +svbfloat16x4_t svld4q_vnum(svbool_t, bfloat16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) +svint8x4_t svld4q_vnum(svbool_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) +svfloat64x4_t svld4q_vnum(svbool_t, float64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) +svfloat32x4_t svld4q_vnum(svbool_t, float32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) +svfloat16x4_t svld4q_vnum(svbool_t, float16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) +svint32x4_t svld4q_vnum(svbool_t, int32_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) +svint64x4_t svld4q_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_mf8))) +svmfloat8x4_t svld4q_vnum(svbool_t, mfloat8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) +svint16x4_t svld4q_vnum(svbool_t, int16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) +float64x2_t svmaxnmqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) +float32x4_t svmaxnmqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) +float16x8_t svmaxnmqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) +float64x2_t svmaxqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) +float32x4_t svmaxqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) +float16x8_t svmaxqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) +int8x16_t svmaxqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) +int32x4_t svmaxqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) +int64x2_t svmaxqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) +int16x8_t svmaxqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) +uint8x16_t svmaxqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) +uint32x4_t svmaxqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) +uint64x2_t svmaxqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) +uint16x8_t svmaxqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) +float64x2_t svminnmqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) +float32x4_t svminnmqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) +float16x8_t svminnmqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) +float64x2_t svminqv(svbool_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) +float32x4_t svminqv(svbool_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) +float16x8_t svminqv(svbool_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) +int8x16_t svminqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) +int32x4_t svminqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) +int64x2_t svminqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) +int16x8_t svminqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) +uint8x16_t svminqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) +uint32x4_t svminqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) +uint64x2_t svminqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) +uint16x8_t svminqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) +uint8x16_t svorqv(svbool_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) +uint32x4_t svorqv(svbool_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) +uint64x2_t svorqv(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) +uint16x8_t svorqv(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) +int8x16_t svorqv(svbool_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) +int32x4_t svorqv(svbool_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) +int64x2_t svorqv(svbool_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) +int16x8_t svorqv(svbool_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) +svbool_t svpmov(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) +svbool_t svpmov(svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) +svbool_t svpmov(svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) +svbool_t svpmov(svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) +svbool_t svpmov(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) +svbool_t svpmov(svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) +svbool_t svpmov(svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) +svbool_t svpmov(svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) +svbool_t svpmov_lane(svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) +svbool_t svpmov_lane(svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) +svbool_t svpmov_lane(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) +svbool_t svpmov_lane(svint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) +svbool_t svpmov_lane(svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) +svbool_t svpmov_lane(svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) +svbool_t svpmov_lane(svuint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) +svbool_t svpmov_lane(svint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) +svuint64_t svpmov_lane_m(svuint64_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) +svint64_t svpmov_lane_m(svint64_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) +svuint16_t svpmov_lane_m(svuint16_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) +svint16_t svpmov_lane_m(svint16_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) +svuint32_t svpmov_lane_m(svuint32_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) +svint32_t svpmov_lane_m(svint32_t, svbool_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) +void svst2q(svbool_t, uint8_t const *, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) +void svst2q(svbool_t, uint32_t const *, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) +void svst2q(svbool_t, uint64_t const *, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) +void svst2q(svbool_t, uint16_t const *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) +void svst2q(svbool_t, bfloat16_t const *, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) +void svst2q(svbool_t, int8_t const *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) +void svst2q(svbool_t, float64_t const *, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) +void svst2q(svbool_t, float32_t const *, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) +void svst2q(svbool_t, float16_t const *, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) +void svst2q(svbool_t, int32_t const *, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) +void svst2q(svbool_t, int64_t const *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_mf8))) +void svst2q(svbool_t, mfloat8_t const *, svmfloat8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) +void svst2q(svbool_t, int16_t const *, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) +void svst2q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) +void svst2q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) +void svst2q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) +void svst2q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) +void svst2q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) +void svst2q_vnum(svbool_t, int8_t const *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) +void svst2q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) +void svst2q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) +void svst2q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) +void svst2q_vnum(svbool_t, int32_t const *, int64_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) +void svst2q_vnum(svbool_t, int64_t const *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_mf8))) +void svst2q_vnum(svbool_t, mfloat8_t const *, int64_t, svmfloat8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) +void svst2q_vnum(svbool_t, int16_t const *, int64_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) +void svst3q(svbool_t, uint8_t const *, svuint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) +void svst3q(svbool_t, uint32_t const *, svuint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) +void svst3q(svbool_t, uint64_t const *, svuint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) +void svst3q(svbool_t, uint16_t const *, svuint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) +void svst3q(svbool_t, bfloat16_t const *, svbfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) +void svst3q(svbool_t, int8_t const *, svint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) +void svst3q(svbool_t, float64_t const *, svfloat64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) +void svst3q(svbool_t, float32_t const *, svfloat32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) +void svst3q(svbool_t, float16_t const *, svfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) +void svst3q(svbool_t, int32_t const *, svint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) +void svst3q(svbool_t, int64_t const *, svint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_mf8))) +void svst3q(svbool_t, mfloat8_t const *, svmfloat8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) +void svst3q(svbool_t, int16_t const *, svint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) +void svst3q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) +void svst3q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) +void svst3q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) +void svst3q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) +void svst3q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) +void svst3q_vnum(svbool_t, int8_t const *, int64_t, svint8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) +void svst3q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) +void svst3q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) +void svst3q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) +void svst3q_vnum(svbool_t, int32_t const *, int64_t, svint32x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) +void svst3q_vnum(svbool_t, int64_t const *, int64_t, svint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_mf8))) +void svst3q_vnum(svbool_t, mfloat8_t const *, int64_t, svmfloat8x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) +void svst3q_vnum(svbool_t, int16_t const *, int64_t, svint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) +void svst4q(svbool_t, uint8_t const *, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) +void svst4q(svbool_t, uint32_t const *, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) +void svst4q(svbool_t, uint64_t const *, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) +void svst4q(svbool_t, uint16_t const *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) +void svst4q(svbool_t, bfloat16_t const *, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) +void svst4q(svbool_t, int8_t const *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) +void svst4q(svbool_t, float64_t const *, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) +void svst4q(svbool_t, float32_t const *, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) +void svst4q(svbool_t, float16_t const *, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) +void svst4q(svbool_t, int32_t const *, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) +void svst4q(svbool_t, int64_t const *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_mf8))) +void svst4q(svbool_t, mfloat8_t const *, svmfloat8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) +void svst4q(svbool_t, int16_t const *, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) +void svst4q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) +void svst4q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) +void svst4q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) +void svst4q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) +void svst4q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) +void svst4q_vnum(svbool_t, int8_t const *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) +void svst4q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) +void svst4q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) +void svst4q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) +void svst4q_vnum(svbool_t, int32_t const *, int64_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) +void svst4q_vnum(svbool_t, int64_t const *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_mf8))) +void svst4q_vnum(svbool_t, mfloat8_t const *, int64_t, svmfloat8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) +void svst4q_vnum(svbool_t, int16_t const *, int64_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) +svuint8_t svtblq(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) +svuint32_t svtblq(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) +svuint64_t svtblq(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) +svuint16_t svtblq(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) +svbfloat16_t svtblq(svbfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) +svint8_t svtblq(svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) +svfloat64_t svtblq(svfloat64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) +svfloat32_t svtblq(svfloat32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) +svfloat16_t svtblq(svfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) +svint32_t svtblq(svint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) +svint64_t svtblq(svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_mf8))) +svmfloat8_t svtblq(svmfloat8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) +svint16_t svtblq(svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) +svuint8_t svtbxq(svuint8_t, svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) +svuint32_t svtbxq(svuint32_t, svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) +svuint64_t svtbxq(svuint64_t, svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) +svuint16_t svtbxq(svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) +svbfloat16_t svtbxq(svbfloat16_t, svbfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) +svint8_t svtbxq(svint8_t, svint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) +svfloat64_t svtbxq(svfloat64_t, svfloat64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) +svfloat32_t svtbxq(svfloat32_t, svfloat32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) +svfloat16_t svtbxq(svfloat16_t, svfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) +svint32_t svtbxq(svint32_t, svint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) +svint64_t svtbxq(svint64_t, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_mf8))) +svmfloat8_t svtbxq(svmfloat8_t, svmfloat8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) +svint16_t svtbxq(svint16_t, svint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) +svuint8_t svuzpq1(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) +svuint32_t svuzpq1(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) +svuint64_t svuzpq1(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) +svuint16_t svuzpq1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) +svbfloat16_t svuzpq1(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) +svint8_t svuzpq1(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) +svfloat64_t svuzpq1(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) +svfloat32_t svuzpq1(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) +svfloat16_t svuzpq1(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) +svint32_t svuzpq1(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) +svint64_t svuzpq1(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_mf8))) +svmfloat8_t svuzpq1(svmfloat8_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) +svint16_t svuzpq1(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) +svuint8_t svuzpq2(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) +svuint32_t svuzpq2(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) +svuint64_t svuzpq2(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) +svuint16_t svuzpq2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) +svbfloat16_t svuzpq2(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) +svint8_t svuzpq2(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) +svfloat64_t svuzpq2(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) +svfloat32_t svuzpq2(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) +svfloat16_t svuzpq2(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) +svint32_t svuzpq2(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) +svint64_t svuzpq2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_mf8))) +svmfloat8_t svuzpq2(svmfloat8_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) +svint16_t svuzpq2(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) +svuint8_t svzipq1(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) +svuint32_t svzipq1(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) +svuint64_t svzipq1(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) +svuint16_t svzipq1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) +svbfloat16_t svzipq1(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) +svint8_t svzipq1(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) +svfloat64_t svzipq1(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) +svfloat32_t svzipq1(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) +svfloat16_t svzipq1(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) +svint32_t svzipq1(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) +svint64_t svzipq1(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_mf8))) +svmfloat8_t svzipq1(svmfloat8_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) +svint16_t svzipq1(svint16_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) +svuint8_t svzipq2(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) +svuint32_t svzipq2(svuint32_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) +svuint64_t svzipq2(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) +svuint16_t svzipq2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) +svbfloat16_t svzipq2(svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) +svint8_t svzipq2(svint8_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) +svfloat64_t svzipq2(svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) +svfloat32_t svzipq2(svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) +svfloat16_t svzipq2(svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) +svint32_t svzipq2(svint32_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) +svint64_t svzipq2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) +svmfloat8_t svzipq2(svmfloat8_t, svmfloat8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) +svint16_t svzipq2(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) svfloat32_t svbfmmla_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) -svbfloat16_t svldff1_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) -svbfloat16_t svldff1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) -svbfloat16_t svldnf1_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) -svbfloat16_t svldnf1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmmla_f32))) svfloat32_t svbfmmla(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_bf16))) -svbfloat16_t svldff1(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldff1_vnum_bf16))) -svbfloat16_t svldff1_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_bf16))) -svbfloat16_t svldnf1(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnf1_vnum_bf16))) -svbfloat16_t svldnf1_vnum(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) -svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) -svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) -svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) -svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) -svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) -svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) -svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) -svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) -svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) -svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) -svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) -svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) svfloat32_t svbfdot_n_f32(svfloat32_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) @@ -6955,26 +8799,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) svfloat32_t svbfmlalt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) svfloat32_t svbfmlalt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) -bfloat16_t svclasta_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) -svbfloat16_t svclasta_bf16(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) -bfloat16_t svclastb_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) -svbfloat16_t svclastb_bf16(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) -svuint16_t svcnt_bf16_m(svuint16_t, svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) -svuint16_t svcnt_bf16_x(svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) -svuint16_t svcnt_bf16_z(svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) -svbfloat16x2_t svcreate2_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) -svbfloat16x3_t svcreate3_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) -svbfloat16x4_t svcreate4_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) svbfloat16_t svcvt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) @@ -6983,112 +8807,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) svbfloat16_t svcvt_bf16_f32_z(svbool_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) svbfloat16_t svcvtnt_bf16_f32_m(svbfloat16_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) -svbfloat16_t svdup_n_bf16(bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) -svbfloat16_t svdup_n_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) -svbfloat16_t svdup_n_bf16_x(svbool_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) -svbfloat16_t svdup_n_bf16_z(svbool_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) -svbfloat16_t svdup_lane_bf16(svbfloat16_t, uint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) -svbfloat16_t svdupq_n_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) -svbfloat16_t svdupq_lane_bf16(svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) -svbfloat16_t svext_bf16(svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) -svbfloat16_t svget2_bf16(svbfloat16x2_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) -svbfloat16_t svget3_bf16(svbfloat16x3_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) -svbfloat16_t svget4_bf16(svbfloat16x4_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) -svbfloat16_t svinsr_n_bf16(svbfloat16_t, bfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) -bfloat16_t svlasta_bf16(svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) -bfloat16_t svlastb_bf16(svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) -svbfloat16_t svld1_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) -svbfloat16_t svld1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) -svbfloat16_t svld1rq_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) -svbfloat16x2_t svld2_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) -svbfloat16x2_t svld2_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) -svbfloat16x3_t svld3_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) -svbfloat16x3_t svld3_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) -svbfloat16x4_t svld4_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) -svbfloat16x4_t svld4_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) -svbfloat16_t svldnt1_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) -svbfloat16_t svldnt1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) -uint64_t svlen_bf16(svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) -svbfloat16_t svrev_bf16(svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) -svbfloat16_t svsel_bf16(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) -svbfloat16x2_t svset2_bf16(svbfloat16x2_t, uint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) -svbfloat16x3_t svset3_bf16(svbfloat16x3_t, uint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) -svbfloat16x4_t svset4_bf16(svbfloat16x4_t, uint64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) -svbfloat16_t svsplice_bf16(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) -void svst1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) -void svst1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) -void svst2_bf16(svbool_t, bfloat16_t *, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) -void svst2_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) -void svst3_bf16(svbool_t, bfloat16_t *, svbfloat16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) -void svst3_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) -void svst4_bf16(svbool_t, bfloat16_t *, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) -void svst4_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) -void svstnt1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) -void svstnt1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) -svbfloat16_t svtbl_bf16(svbfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) -svbfloat16_t svtrn1_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) -svbfloat16_t svtrn2_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_bf16))) -svbfloat16x2_t svundef2_bf16(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_bf16))) -svbfloat16x3_t svundef3_bf16(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_bf16))) -svbfloat16x4_t svundef4_bf16(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_bf16))) -svbfloat16_t svundef_bf16(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) -svbfloat16_t svuzp1_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) -svbfloat16_t svuzp2_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) -svbfloat16_t svzip1_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) -svbfloat16_t svzip2_bf16(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_n_f32))) svfloat32_t svbfdot(svfloat32_t, svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfdot_f32))) @@ -7107,26 +8825,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_f32))) svfloat32_t svbfmlalt(svfloat32_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlalt_lane_f32))) svfloat32_t svbfmlalt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) -bfloat16_t svclasta(svbool_t, bfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) -svbfloat16_t svclasta(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) -bfloat16_t svclastb(svbool_t, bfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) -svbfloat16_t svclastb(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) -svuint16_t svcnt_m(svuint16_t, svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) -svuint16_t svcnt_x(svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) -svuint16_t svcnt_z(svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) -svbfloat16x2_t svcreate2(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) -svbfloat16x3_t svcreate3(svbfloat16_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) -svbfloat16x4_t svcreate4(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_m))) svbfloat16_t svcvt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_x))) @@ -7135,104 +8833,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_bf16_f32_z))) svbfloat16_t svcvt_bf16_z(svbool_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_bf16_f32_m))) svbfloat16_t svcvtnt_bf16_m(svbfloat16_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) -svbfloat16_t svdup_bf16(bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) -svbfloat16_t svdup_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) -svbfloat16_t svdup_bf16_x(svbool_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) -svbfloat16_t svdup_bf16_z(svbool_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) -svbfloat16_t svdup_lane(svbfloat16_t, uint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) -svbfloat16_t svdupq_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) -svbfloat16_t svdupq_lane(svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) -svbfloat16_t svext(svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) -svbfloat16_t svget2(svbfloat16x2_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) -svbfloat16_t svget3(svbfloat16x3_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) -svbfloat16_t svget4(svbfloat16x4_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) -svbfloat16_t svinsr(svbfloat16_t, bfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) -bfloat16_t svlasta(svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) -bfloat16_t svlastb(svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) -svbfloat16_t svld1(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) -svbfloat16_t svld1_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) -svbfloat16_t svld1rq(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) -svbfloat16x2_t svld2(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) -svbfloat16x2_t svld2_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) -svbfloat16x3_t svld3(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) -svbfloat16x3_t svld3_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) -svbfloat16x4_t svld4(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) -svbfloat16x4_t svld4_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) -svbfloat16_t svldnt1(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) -svbfloat16_t svldnt1_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) -uint64_t svlen(svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) -svbfloat16_t svrev(svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) -svbfloat16_t svsel(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) -svbfloat16x2_t svset2(svbfloat16x2_t, uint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) -svbfloat16x3_t svset3(svbfloat16x3_t, uint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) -svbfloat16x4_t svset4(svbfloat16x4_t, uint64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) -svbfloat16_t svsplice(svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) -void svst1(svbool_t, bfloat16_t *, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) -void svst1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) -void svst2(svbool_t, bfloat16_t *, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) -void svst2_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) -void svst3(svbool_t, bfloat16_t *, svbfloat16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) -void svst3_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) -void svst4(svbool_t, bfloat16_t *, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) -void svst4_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) -void svstnt1(svbool_t, bfloat16_t *, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) -void svstnt1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) -svbfloat16_t svtbl(svbfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) -svbfloat16_t svtrn1(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) -svbfloat16_t svtrn2(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) -svbfloat16_t svuzp1(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) -svbfloat16_t svuzp2(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) -svbfloat16_t svzip1(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) -svbfloat16_t svzip2(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) svfloat32_t svmmla_f32(svfloat32_t, svfloat32_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f32))) @@ -7245,6 +8845,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) svuint64_t svld1ro_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) svuint16_t svld1ro_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) +svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) svint8_t svld1ro_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) @@ -7257,6 +8859,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) svint32_t svld1ro_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) svint64_t svld1ro_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8))) +svmfloat8_t svld1ro_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) svint16_t svld1ro_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) @@ -7269,6 +8873,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) svuint64_t svtrn1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) svuint16_t svtrn1q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) +svbfloat16_t svtrn1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) svint8_t svtrn1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) @@ -7291,6 +8897,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) svuint64_t svtrn2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) svuint16_t svtrn2q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) +svbfloat16_t svtrn2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) svint8_t svtrn2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) @@ -7313,6 +8921,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) svuint64_t svuzp1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) svuint16_t svuzp1q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) +svbfloat16_t svuzp1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) svint8_t svuzp1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) @@ -7335,6 +8945,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) svuint64_t svuzp2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) svuint16_t svuzp2q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) +svbfloat16_t svuzp2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) svint8_t svuzp2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) @@ -7357,6 +8969,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) svuint64_t svzip1q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) svuint16_t svzip1q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) +svbfloat16_t svzip1q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) svint8_t svzip1q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) @@ -7379,6 +8993,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) svuint64_t svzip2q_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) svuint16_t svzip2q_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) +svbfloat16_t svzip2q_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) svint8_t svzip2q_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) @@ -7401,6 +9017,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u64))) svuint64_t svld1ro(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_u16))) svuint16_t svld1ro(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) +svbfloat16_t svld1ro(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s8))) svint8_t svld1ro(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_f64))) @@ -7413,6 +9031,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s32))) svint32_t svld1ro(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s64))) svint64_t svld1ro(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_mf8))) +svmfloat8_t svld1ro(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_s16))) svint16_t svld1ro(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_f64))) @@ -7425,6 +9045,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u64))) svuint64_t svtrn1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_u16))) svuint16_t svtrn1q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_bf16))) +svbfloat16_t svtrn1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_s8))) svint8_t svtrn1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1q_f64))) @@ -7447,6 +9069,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u64))) svuint64_t svtrn2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_u16))) svuint16_t svtrn2q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_bf16))) +svbfloat16_t svtrn2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_s8))) svint8_t svtrn2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2q_f64))) @@ -7469,6 +9093,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u64))) svuint64_t svuzp1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_u16))) svuint16_t svuzp1q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_bf16))) +svbfloat16_t svuzp1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_s8))) svint8_t svuzp1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1q_f64))) @@ -7491,6 +9117,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u64))) svuint64_t svuzp2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_u16))) svuint16_t svuzp2q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_bf16))) +svbfloat16_t svuzp2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_s8))) svint8_t svuzp2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2q_f64))) @@ -7513,6 +9141,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u64))) svuint64_t svzip1q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_u16))) svuint16_t svzip1q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_bf16))) +svbfloat16_t svzip1q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_s8))) svint8_t svzip1q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1q_f64))) @@ -7535,6 +9165,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u64))) svuint64_t svzip2q(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_u16))) svuint16_t svzip2q(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_bf16))) +svbfloat16_t svzip2q(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s8))) svint8_t svzip2q(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_f64))) @@ -7549,10 +9181,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s64))) svint64_t svzip2q(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2q_s16))) svint16_t svzip2q(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) -svbfloat16_t svld1ro_bf16(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1ro_bf16))) -svbfloat16_t svld1ro(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_s32))) svint32_t svmmla_s32(svint32_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmmla_u32))) @@ -7589,6 +9217,14 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_s32))) svint32_t svusdot(svint32_t, svuint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svusdot_lane_s32))) svint32_t svusdot_lane(svint32_t, svuint8_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) +svuint64_t svrax1_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) +svint64_t svrax1_s64(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) +svuint64_t svrax1(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) +svint64_t svrax1(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u32_z))) svuint32_t svhistcnt_u32_z(svbool_t, svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svhistcnt_u64_z))) @@ -8509,22 +10145,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u6 void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) -svbfloat16_t svtbl2_bf16(svbfloat16x2_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) -svbfloat16_t svtbx_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) -svbool_t svwhilerw_bf16(bfloat16_t const *, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) -svbool_t svwhilewr_bf16(bfloat16_t const *, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) -svbfloat16_t svtbl2(svbfloat16x2_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) -svbfloat16_t svtbx(svbfloat16_t, svbfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) -svbool_t svwhilerw(bfloat16_t const *, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) -svbool_t svwhilewr(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) svfloat64_t svamax_n_f64_m(svbool_t, svfloat64_t, float64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) @@ -8813,24 +10433,14 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_ svmfloat8_t svcvtnb_mf8_fpm(svfloat32x2_t, fpm_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) svmfloat8_t svcvtnt_mf8_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) -svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) -svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) -svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) -svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) -svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) -svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) svuint8_t svluti2_lane_u8(svuint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) svint8_t svluti2_lane_s8(svint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) svuint16_t svluti2_lane_u16(svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) +svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) svfloat16_t svluti2_lane_f16(svfloat16_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) @@ -8841,12 +10451,16 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) svint8_t svluti4_lane_s8(svint8_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) svuint16_t svluti4_lane_u16(svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) +svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) svfloat16_t svluti4_lane_f16(svfloat16_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) svint16_t svluti4_lane_s16(svint16_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) svuint16_t svluti4_lane_u16_x2(svuint16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) +svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) svfloat16_t svluti4_lane_f16_x2(svfloat16x2_t, svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) @@ -8857,6 +10471,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) svint8_t svluti2_lane(svint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) svuint16_t svluti2_lane(svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) +svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) svfloat16_t svluti2_lane(svfloat16_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) @@ -8867,12 +10483,16 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) svint8_t svluti4_lane(svint8_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) svuint16_t svluti4_lane(svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) +svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) svfloat16_t svluti4_lane(svfloat16_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) svint16_t svluti4_lane(svint16_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) svuint16_t svluti4_lane(svuint16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) +svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) svfloat16_t svluti4_lane(svfloat16x2_t, svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) @@ -9237,14 +10857,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u64))) svuint64_t svbgrp(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbgrp_u16))) svuint16_t svbgrp(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) -svuint64_t svrax1_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) -svint64_t svrax1_s64(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_u64))) -svuint64_t svrax1(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrax1_s64))) -svint64_t svrax1(svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e_u32(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) @@ -9253,86 +10865,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4e_u32))) svuint32_t svsm4e(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsm4ekey_u32))) svuint32_t svsm4ekey(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) -uint8x16_t svaddqv_u8(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) -uint32x4_t svaddqv_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) -uint64x2_t svaddqv_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) -uint16x8_t svaddqv_u16(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) -int8x16_t svaddqv_s8(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) -int32x4_t svaddqv_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) -int64x2_t svaddqv_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) -int16x8_t svaddqv_s16(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) -float64x2_t svaddqv_f64(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) -float32x4_t svaddqv_f32(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) -float16x8_t svaddqv_f16(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) -uint8x16_t svandqv_u8(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) -uint32x4_t svandqv_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) -uint64x2_t svandqv_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) -uint16x8_t svandqv_u16(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) -int8x16_t svandqv_s8(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) -int32x4_t svandqv_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) -int64x2_t svandqv_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) -int16x8_t svandqv_s16(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) -uint8x16_t sveorqv_u8(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) -uint32x4_t sveorqv_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) -uint64x2_t sveorqv_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) -uint16x8_t sveorqv_u16(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) -int8x16_t sveorqv_s8(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) -int32x4_t sveorqv_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) -int64x2_t sveorqv_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) -int16x8_t sveorqv_s16(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) -svuint8_t svextq_u8(svuint8_t, svuint8_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) -svuint32_t svextq_u32(svuint32_t, svuint32_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) -svuint64_t svextq_u64(svuint64_t, svuint64_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) -svuint16_t svextq_u16(svuint16_t, svuint16_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) -svbfloat16_t svextq_bf16(svbfloat16_t, svbfloat16_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) -svint8_t svextq_s8(svint8_t, svint8_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) -svfloat64_t svextq_f64(svfloat64_t, svfloat64_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) -svfloat32_t svextq_f32(svfloat32_t, svfloat32_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) -svfloat16_t svextq_f16(svfloat16_t, svfloat16_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) -svint32_t svextq_s32(svint32_t, svint32_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) -svint64_t svextq_s64(svint64_t, svint64_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_mf8))) -svmfloat8_t svextq_mf8(svmfloat8_t, svmfloat8_t, int32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) -svint16_t svextq_s16(svint16_t, svint16_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) svuint32_t svld1q_gather_u64base_index_u32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) @@ -9375,6 +10907,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64bas svint32_t svld1q_gather_u64base_offset_s32(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) svint64_t svld1q_gather_u64base_offset_s64(svbool_t, svuint64_t, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8))) +svmfloat8_t svld1q_gather_u64base_offset_mf8(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) svint16_t svld1q_gather_u64base_offset_s16(svbool_t, svuint64_t, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) @@ -9399,6 +10933,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64bas svint32_t svld1q_gather_u64base_s32(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) svint64_t svld1q_gather_u64base_s64(svbool_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8))) +svmfloat8_t svld1q_gather_u64base_mf8(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) svint16_t svld1q_gather_u64base_s16(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) @@ -9443,6 +10979,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64off svint32_t svld1q_gather_u64offset_s32(svbool_t, int32_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) svint64_t svld1q_gather_u64offset_s64(svbool_t, int64_t const *, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8))) +svmfloat8_t svld1q_gather_u64offset_mf8(svbool_t, mfloat8_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) svint16_t svld1q_gather_u64offset_s16(svbool_t, int16_t const *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) @@ -9469,282 +11007,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32))) svfloat32_t svld1uwq_vnum_f32(svbool_t, float32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) svint32_t svld1uwq_vnum_s32(svbool_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) -svuint8x2_t svld2q_u8(svbool_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) -svuint32x2_t svld2q_u32(svbool_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) -svuint64x2_t svld2q_u64(svbool_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) -svuint16x2_t svld2q_u16(svbool_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) -svint8x2_t svld2q_s8(svbool_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) -svfloat64x2_t svld2q_f64(svbool_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) -svfloat32x2_t svld2q_f32(svbool_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) -svfloat16x2_t svld2q_f16(svbool_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) -svint32x2_t svld2q_s32(svbool_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) -svint64x2_t svld2q_s64(svbool_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) -svint16x2_t svld2q_s16(svbool_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) -svbfloat16x2_t svld2q_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) -svuint8x2_t svld2q_vnum_u8(svbool_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) -svuint32x2_t svld2q_vnum_u32(svbool_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) -svuint64x2_t svld2q_vnum_u64(svbool_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) -svuint16x2_t svld2q_vnum_u16(svbool_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) -svint8x2_t svld2q_vnum_s8(svbool_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) -svfloat64x2_t svld2q_vnum_f64(svbool_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) -svfloat32x2_t svld2q_vnum_f32(svbool_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) -svfloat16x2_t svld2q_vnum_f16(svbool_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) -svint32x2_t svld2q_vnum_s32(svbool_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) -svint64x2_t svld2q_vnum_s64(svbool_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) -svint16x2_t svld2q_vnum_s16(svbool_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) -svbfloat16x2_t svld2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) -svuint8x3_t svld3q_u8(svbool_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) -svuint32x3_t svld3q_u32(svbool_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) -svuint64x3_t svld3q_u64(svbool_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) -svuint16x3_t svld3q_u16(svbool_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) -svint8x3_t svld3q_s8(svbool_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) -svfloat64x3_t svld3q_f64(svbool_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) -svfloat32x3_t svld3q_f32(svbool_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) -svfloat16x3_t svld3q_f16(svbool_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) -svint32x3_t svld3q_s32(svbool_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) -svint64x3_t svld3q_s64(svbool_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) -svint16x3_t svld3q_s16(svbool_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) -svbfloat16x3_t svld3q_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) -svuint8x3_t svld3q_vnum_u8(svbool_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) -svuint32x3_t svld3q_vnum_u32(svbool_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) -svuint64x3_t svld3q_vnum_u64(svbool_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) -svuint16x3_t svld3q_vnum_u16(svbool_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) -svint8x3_t svld3q_vnum_s8(svbool_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) -svfloat64x3_t svld3q_vnum_f64(svbool_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) -svfloat32x3_t svld3q_vnum_f32(svbool_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) -svfloat16x3_t svld3q_vnum_f16(svbool_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) -svint32x3_t svld3q_vnum_s32(svbool_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) -svint64x3_t svld3q_vnum_s64(svbool_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) -svint16x3_t svld3q_vnum_s16(svbool_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) -svbfloat16x3_t svld3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) -svuint8x4_t svld4q_u8(svbool_t, uint8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) -svuint32x4_t svld4q_u32(svbool_t, uint32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) -svuint64x4_t svld4q_u64(svbool_t, uint64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) -svuint16x4_t svld4q_u16(svbool_t, uint16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) -svint8x4_t svld4q_s8(svbool_t, int8_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) -svfloat64x4_t svld4q_f64(svbool_t, float64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) -svfloat32x4_t svld4q_f32(svbool_t, float32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) -svfloat16x4_t svld4q_f16(svbool_t, float16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) -svint32x4_t svld4q_s32(svbool_t, int32_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) -svint64x4_t svld4q_s64(svbool_t, int64_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) -svint16x4_t svld4q_s16(svbool_t, int16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) -svbfloat16x4_t svld4q_bf16(svbool_t, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) -svuint8x4_t svld4q_vnum_u8(svbool_t, uint8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) -svuint32x4_t svld4q_vnum_u32(svbool_t, uint32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) -svuint64x4_t svld4q_vnum_u64(svbool_t, uint64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) -svuint16x4_t svld4q_vnum_u16(svbool_t, uint16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) -svint8x4_t svld4q_vnum_s8(svbool_t, int8_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) -svfloat64x4_t svld4q_vnum_f64(svbool_t, float64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) -svfloat32x4_t svld4q_vnum_f32(svbool_t, float32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) -svfloat16x4_t svld4q_vnum_f16(svbool_t, float16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) -svint32x4_t svld4q_vnum_s32(svbool_t, int32_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) -svint64x4_t svld4q_vnum_s64(svbool_t, int64_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) -svint16x4_t svld4q_vnum_s16(svbool_t, int16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) -svbfloat16x4_t svld4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) -float64x2_t svmaxnmqv_f64(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) -float32x4_t svmaxnmqv_f32(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) -float16x8_t svmaxnmqv_f16(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) -float64x2_t svmaxqv_f64(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) -float32x4_t svmaxqv_f32(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) -float16x8_t svmaxqv_f16(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) -int8x16_t svmaxqv_s8(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) -int32x4_t svmaxqv_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) -int64x2_t svmaxqv_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) -int16x8_t svmaxqv_s16(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) -uint8x16_t svmaxqv_u8(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) -uint32x4_t svmaxqv_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) -uint64x2_t svmaxqv_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) -uint16x8_t svmaxqv_u16(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) -float64x2_t svminnmqv_f64(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) -float32x4_t svminnmqv_f32(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) -float16x8_t svminnmqv_f16(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) -float64x2_t svminqv_f64(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) -float32x4_t svminqv_f32(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) -float16x8_t svminqv_f16(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) -int8x16_t svminqv_s8(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) -int32x4_t svminqv_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) -int64x2_t svminqv_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) -int16x8_t svminqv_s16(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) -uint8x16_t svminqv_u8(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) -uint32x4_t svminqv_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) -uint64x2_t svminqv_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) -uint16x8_t svminqv_u16(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) -uint8x16_t svorqv_u8(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) -uint32x4_t svorqv_u32(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) -uint64x2_t svorqv_u64(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) -uint16x8_t svorqv_u16(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) -int8x16_t svorqv_s8(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) -int32x4_t svorqv_s32(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) -int64x2_t svorqv_s64(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) -int16x8_t svorqv_s16(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) -svbool_t svpmov_u8(svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) -svbool_t svpmov_s8(svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) -svbool_t svpmov_u64(svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) -svbool_t svpmov_s64(svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) -svbool_t svpmov_u16(svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) -svbool_t svpmov_s16(svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) -svbool_t svpmov_u32(svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) -svbool_t svpmov_s32(svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) -svbool_t svpmov_lane_u8(svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) -svbool_t svpmov_lane_s8(svint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) -svbool_t svpmov_lane_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) -svbool_t svpmov_lane_s64(svint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) -svbool_t svpmov_lane_u16(svuint16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) -svbool_t svpmov_lane_s16(svint16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) -svbool_t svpmov_lane_u32(svuint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) -svbool_t svpmov_lane_s32(svint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) -svuint64_t svpmov_lane_u64_m(svuint64_t, svbool_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) -svint64_t svpmov_lane_s64_m(svint64_t, svbool_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) -svuint16_t svpmov_lane_u16_m(svuint16_t, svbool_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) -svint16_t svpmov_lane_s16_m(svint16_t, svbool_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) -svuint32_t svpmov_lane_u32_m(svuint32_t, svbool_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) -svint32_t svpmov_lane_s32_m(svint32_t, svbool_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8_z))) -svuint8_t svpmov_u8_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8_z))) -svint8_t svpmov_s8_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64_z))) -svuint64_t svpmov_u64_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64_z))) -svint64_t svpmov_s64_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16_z))) -svuint16_t svpmov_u16_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16_z))) -svint16_t svpmov_s16_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32_z))) -svuint32_t svpmov_u32_z(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32_z))) -svint32_t svpmov_s32_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) void svst1dq_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) @@ -9779,6 +11041,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64ba void svst1q_scatter_u64base_s32(svbool_t, svuint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) void svst1q_scatter_u64base_s64(svbool_t, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8))) +void svst1q_scatter_u64base_mf8(svbool_t, svuint64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) void svst1q_scatter_u64base_s16(svbool_t, svuint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) @@ -9823,6 +11087,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64ba void svst1q_scatter_u64base_offset_s32(svbool_t, svuint64_t, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8))) +void svst1q_scatter_u64base_offset_mf8(svbool_t, svuint64_t, int64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) @@ -9887,6 +11153,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64of void svst1q_scatter_s64offset_s32(svbool_t, int32_t *, svint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) void svst1q_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8))) +void svst1q_scatter_s64offset_mf8(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) void svst1q_scatter_s64offset_s16(svbool_t, int16_t *, svint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) @@ -9911,6 +11179,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64of void svst1q_scatter_u64offset_s32(svbool_t, int32_t *, svuint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8))) +void svst1q_scatter_u64offset_mf8(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) @@ -9925,386 +11195,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) void svst1wq_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) void svst1wq_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) -void svst2q_u8(svbool_t, uint8_t const *, svuint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) -void svst2q_u32(svbool_t, uint32_t const *, svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) -void svst2q_u64(svbool_t, uint64_t const *, svuint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) -void svst2q_u16(svbool_t, uint16_t const *, svuint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) -void svst2q_s8(svbool_t, int8_t const *, svint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) -void svst2q_f64(svbool_t, float64_t const *, svfloat64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) -void svst2q_f32(svbool_t, float32_t const *, svfloat32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) -void svst2q_f16(svbool_t, float16_t const *, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) -void svst2q_s32(svbool_t, int32_t const *, svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) -void svst2q_s64(svbool_t, int64_t const *, svint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) -void svst2q_s16(svbool_t, int16_t const *, svint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) -void svst2q_bf16(svbool_t, bfloat16_t const *, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) -void svst2q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) -void svst2q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) -void svst2q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) -void svst2q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) -void svst2q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) -void svst2q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) -void svst2q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) -void svst2q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) -void svst2q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) -void svst2q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) -void svst2q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) -void svst2q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) -void svst3q_u8(svbool_t, uint8_t const *, svuint8x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) -void svst3q_u32(svbool_t, uint32_t const *, svuint32x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) -void svst3q_u64(svbool_t, uint64_t const *, svuint64x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) -void svst3q_u16(svbool_t, uint16_t const *, svuint16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) -void svst3q_s8(svbool_t, int8_t const *, svint8x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) -void svst3q_f64(svbool_t, float64_t const *, svfloat64x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) -void svst3q_f32(svbool_t, float32_t const *, svfloat32x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) -void svst3q_f16(svbool_t, float16_t const *, svfloat16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) -void svst3q_s32(svbool_t, int32_t const *, svint32x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) -void svst3q_s64(svbool_t, int64_t const *, svint64x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) -void svst3q_s16(svbool_t, int16_t const *, svint16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) -void svst3q_bf16(svbool_t, bfloat16_t const *, svbfloat16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) -void svst3q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) -void svst3q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) -void svst3q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) -void svst3q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) -void svst3q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) -void svst3q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) -void svst3q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) -void svst3q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) -void svst3q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) -void svst3q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) -void svst3q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) -void svst3q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) -void svst4q_u8(svbool_t, uint8_t const *, svuint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) -void svst4q_u32(svbool_t, uint32_t const *, svuint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) -void svst4q_u64(svbool_t, uint64_t const *, svuint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) -void svst4q_u16(svbool_t, uint16_t const *, svuint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) -void svst4q_s8(svbool_t, int8_t const *, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) -void svst4q_f64(svbool_t, float64_t const *, svfloat64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) -void svst4q_f32(svbool_t, float32_t const *, svfloat32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) -void svst4q_f16(svbool_t, float16_t const *, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) -void svst4q_s32(svbool_t, int32_t const *, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) -void svst4q_s64(svbool_t, int64_t const *, svint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) -void svst4q_s16(svbool_t, int16_t const *, svint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) -void svst4q_bf16(svbool_t, bfloat16_t const *, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) -void svst4q_vnum_u8(svbool_t, uint8_t const *, int64_t, svuint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) -void svst4q_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) -void svst4q_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) -void svst4q_vnum_u16(svbool_t, uint16_t const *, int64_t, svuint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) -void svst4q_vnum_s8(svbool_t, int8_t const *, int64_t, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) -void svst4q_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) -void svst4q_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) -void svst4q_vnum_f16(svbool_t, float16_t const *, int64_t, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) -void svst4q_vnum_s32(svbool_t, int32_t const *, int64_t, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) -void svst4q_vnum_s64(svbool_t, int64_t const *, int64_t, svint64x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) -void svst4q_vnum_s16(svbool_t, int16_t const *, int64_t, svint16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) -void svst4q_vnum_bf16(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) -svuint8_t svtblq_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) -svuint32_t svtblq_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) -svuint64_t svtblq_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) -svuint16_t svtblq_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) -svbfloat16_t svtblq_bf16(svbfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) -svint8_t svtblq_s8(svint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) -svfloat64_t svtblq_f64(svfloat64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) -svfloat32_t svtblq_f32(svfloat32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) -svfloat16_t svtblq_f16(svfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) -svint32_t svtblq_s32(svint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) -svint64_t svtblq_s64(svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_mf8))) -svmfloat8_t svtblq_mf8(svmfloat8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) -svint16_t svtblq_s16(svint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) -svuint8_t svtbxq_u8(svuint8_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) -svuint32_t svtbxq_u32(svuint32_t, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) -svuint64_t svtbxq_u64(svuint64_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) -svuint16_t svtbxq_u16(svuint16_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) -svbfloat16_t svtbxq_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) -svint8_t svtbxq_s8(svint8_t, svint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) -svfloat64_t svtbxq_f64(svfloat64_t, svfloat64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) -svfloat32_t svtbxq_f32(svfloat32_t, svfloat32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) -svfloat16_t svtbxq_f16(svfloat16_t, svfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) -svint32_t svtbxq_s32(svint32_t, svint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) -svint64_t svtbxq_s64(svint64_t, svint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_mf8))) -svmfloat8_t svtbxq_mf8(svmfloat8_t, svmfloat8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) -svint16_t svtbxq_s16(svint16_t, svint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) -svuint8_t svuzpq1_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) -svuint32_t svuzpq1_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) -svuint64_t svuzpq1_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) -svuint16_t svuzpq1_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) -svbfloat16_t svuzpq1_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) -svint8_t svuzpq1_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) -svfloat64_t svuzpq1_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) -svfloat32_t svuzpq1_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) -svfloat16_t svuzpq1_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) -svint32_t svuzpq1_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) -svint64_t svuzpq1_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_mf8))) -svmfloat8_t svuzpq1_mf8(svmfloat8_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) -svint16_t svuzpq1_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) -svuint8_t svuzpq2_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) -svuint32_t svuzpq2_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) -svuint64_t svuzpq2_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) -svuint16_t svuzpq2_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) -svbfloat16_t svuzpq2_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) -svint8_t svuzpq2_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) -svfloat64_t svuzpq2_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) -svfloat32_t svuzpq2_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) -svfloat16_t svuzpq2_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) -svint32_t svuzpq2_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) -svint64_t svuzpq2_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_mf8))) -svmfloat8_t svuzpq2_mf8(svmfloat8_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) -svint16_t svuzpq2_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) -svuint8_t svzipq1_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) -svuint32_t svzipq1_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) -svuint64_t svzipq1_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) -svuint16_t svzipq1_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) -svbfloat16_t svzipq1_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) -svint8_t svzipq1_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) -svfloat64_t svzipq1_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) -svfloat32_t svzipq1_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) -svfloat16_t svzipq1_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) -svint32_t svzipq1_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) -svint64_t svzipq1_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_mf8))) -svmfloat8_t svzipq1_mf8(svmfloat8_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) -svint16_t svzipq1_s16(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) -svuint8_t svzipq2_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) -svuint32_t svzipq2_u32(svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) -svuint64_t svzipq2_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) -svuint16_t svzipq2_u16(svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) -svbfloat16_t svzipq2_bf16(svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) -svint8_t svzipq2_s8(svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) -svfloat64_t svzipq2_f64(svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) -svfloat32_t svzipq2_f32(svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) -svfloat16_t svzipq2_f16(svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) -svint32_t svzipq2_s32(svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) -svint64_t svzipq2_s64(svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) -svmfloat8_t svzipq2_mf8(svmfloat8_t, svmfloat8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) -svint16_t svzipq2_s16(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) -uint8x16_t svaddqv(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u32))) -uint32x4_t svaddqv(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u64))) -uint64x2_t svaddqv(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u16))) -uint16x8_t svaddqv(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s8))) -int8x16_t svaddqv(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s32))) -int32x4_t svaddqv(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s64))) -int64x2_t svaddqv(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_s16))) -int16x8_t svaddqv(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f64))) -float64x2_t svaddqv(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f32))) -float32x4_t svaddqv(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_f16))) -float16x8_t svaddqv(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u8))) -uint8x16_t svandqv(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u32))) -uint32x4_t svandqv(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u64))) -uint64x2_t svandqv(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_u16))) -uint16x8_t svandqv(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s8))) -int8x16_t svandqv(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s32))) -int32x4_t svandqv(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s64))) -int64x2_t svandqv(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svandqv_s16))) -int16x8_t svandqv(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u8))) -uint8x16_t sveorqv(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u32))) -uint32x4_t sveorqv(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u64))) -uint64x2_t sveorqv(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_u16))) -uint16x8_t sveorqv(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s8))) -int8x16_t sveorqv(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s32))) -int32x4_t sveorqv(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s64))) -int64x2_t sveorqv(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_sveorqv_s16))) -int16x8_t sveorqv(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u8))) -svuint8_t svextq(svuint8_t, svuint8_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u32))) -svuint32_t svextq(svuint32_t, svuint32_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u64))) -svuint64_t svextq(svuint64_t, svuint64_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_u16))) -svuint16_t svextq(svuint16_t, svuint16_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_bf16))) -svbfloat16_t svextq(svbfloat16_t, svbfloat16_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s8))) -svint8_t svextq(svint8_t, svint8_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f64))) -svfloat64_t svextq(svfloat64_t, svfloat64_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f32))) -svfloat32_t svextq(svfloat32_t, svfloat32_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_f16))) -svfloat16_t svextq(svfloat16_t, svfloat16_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) -svint32_t svextq(svint32_t, svint32_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) -svint64_t svextq(svint64_t, svint64_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_mf8))) -svmfloat8_t svextq(svmfloat8_t, svmfloat8_t, int32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) -svint16_t svextq(svint16_t, svint16_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) svuint32_t svld1q_gather_index_u32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u64))) @@ -10347,6 +11237,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64ba svint32_t svld1q_gather_offset_s32(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s64))) svint64_t svld1q_gather_offset_s64(svbool_t, svuint64_t, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_mf8))) +svmfloat8_t svld1q_gather_offset_mf8(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_offset_s16))) svint16_t svld1q_gather_offset_s16(svbool_t, svuint64_t, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_u8))) @@ -10371,6 +11263,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64ba svint32_t svld1q_gather_s32(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s64))) svint64_t svld1q_gather_s64(svbool_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_mf8))) +svmfloat8_t svld1q_gather_mf8(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_s16))) svint16_t svld1q_gather_s16(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64index_u32))) @@ -10415,6 +11309,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64of svint32_t svld1q_gather_offset(svbool_t, int32_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s64))) svint64_t svld1q_gather_offset(svbool_t, int64_t const *, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_mf8))) +svmfloat8_t svld1q_gather_offset(svbool_t, mfloat8_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64offset_s16))) svint16_t svld1q_gather_offset(svbool_t, int16_t const *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1udq_u64))) @@ -10441,266 +11337,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_f32)) svfloat32_t svld1uwq_vnum(svbool_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1uwq_vnum_s32))) svint32_t svld1uwq_vnum(svbool_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u8))) -svuint8x2_t svld2q(svbool_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u32))) -svuint32x2_t svld2q(svbool_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u64))) -svuint64x2_t svld2q(svbool_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_u16))) -svuint16x2_t svld2q(svbool_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s8))) -svint8x2_t svld2q(svbool_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f64))) -svfloat64x2_t svld2q(svbool_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f32))) -svfloat32x2_t svld2q(svbool_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_f16))) -svfloat16x2_t svld2q(svbool_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s32))) -svint32x2_t svld2q(svbool_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s64))) -svint64x2_t svld2q(svbool_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_s16))) -svint16x2_t svld2q(svbool_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_bf16))) -svbfloat16x2_t svld2q(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u8))) -svuint8x2_t svld2q_vnum(svbool_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u32))) -svuint32x2_t svld2q_vnum(svbool_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u64))) -svuint64x2_t svld2q_vnum(svbool_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_u16))) -svuint16x2_t svld2q_vnum(svbool_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s8))) -svint8x2_t svld2q_vnum(svbool_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f64))) -svfloat64x2_t svld2q_vnum(svbool_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f32))) -svfloat32x2_t svld2q_vnum(svbool_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_f16))) -svfloat16x2_t svld2q_vnum(svbool_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s32))) -svint32x2_t svld2q_vnum(svbool_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s64))) -svint64x2_t svld2q_vnum(svbool_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_s16))) -svint16x2_t svld2q_vnum(svbool_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2q_vnum_bf16))) -svbfloat16x2_t svld2q_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u8))) -svuint8x3_t svld3q(svbool_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u32))) -svuint32x3_t svld3q(svbool_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u64))) -svuint64x3_t svld3q(svbool_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_u16))) -svuint16x3_t svld3q(svbool_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s8))) -svint8x3_t svld3q(svbool_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f64))) -svfloat64x3_t svld3q(svbool_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f32))) -svfloat32x3_t svld3q(svbool_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_f16))) -svfloat16x3_t svld3q(svbool_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s32))) -svint32x3_t svld3q(svbool_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s64))) -svint64x3_t svld3q(svbool_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_s16))) -svint16x3_t svld3q(svbool_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_bf16))) -svbfloat16x3_t svld3q(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u8))) -svuint8x3_t svld3q_vnum(svbool_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u32))) -svuint32x3_t svld3q_vnum(svbool_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u64))) -svuint64x3_t svld3q_vnum(svbool_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_u16))) -svuint16x3_t svld3q_vnum(svbool_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s8))) -svint8x3_t svld3q_vnum(svbool_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f64))) -svfloat64x3_t svld3q_vnum(svbool_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f32))) -svfloat32x3_t svld3q_vnum(svbool_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_f16))) -svfloat16x3_t svld3q_vnum(svbool_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s32))) -svint32x3_t svld3q_vnum(svbool_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s64))) -svint64x3_t svld3q_vnum(svbool_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_s16))) -svint16x3_t svld3q_vnum(svbool_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3q_vnum_bf16))) -svbfloat16x3_t svld3q_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u8))) -svuint8x4_t svld4q(svbool_t, uint8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u32))) -svuint32x4_t svld4q(svbool_t, uint32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u64))) -svuint64x4_t svld4q(svbool_t, uint64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_u16))) -svuint16x4_t svld4q(svbool_t, uint16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s8))) -svint8x4_t svld4q(svbool_t, int8_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f64))) -svfloat64x4_t svld4q(svbool_t, float64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f32))) -svfloat32x4_t svld4q(svbool_t, float32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_f16))) -svfloat16x4_t svld4q(svbool_t, float16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s32))) -svint32x4_t svld4q(svbool_t, int32_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s64))) -svint64x4_t svld4q(svbool_t, int64_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_s16))) -svint16x4_t svld4q(svbool_t, int16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_bf16))) -svbfloat16x4_t svld4q(svbool_t, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u8))) -svuint8x4_t svld4q_vnum(svbool_t, uint8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u32))) -svuint32x4_t svld4q_vnum(svbool_t, uint32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u64))) -svuint64x4_t svld4q_vnum(svbool_t, uint64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_u16))) -svuint16x4_t svld4q_vnum(svbool_t, uint16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s8))) -svint8x4_t svld4q_vnum(svbool_t, int8_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f64))) -svfloat64x4_t svld4q_vnum(svbool_t, float64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f32))) -svfloat32x4_t svld4q_vnum(svbool_t, float32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_f16))) -svfloat16x4_t svld4q_vnum(svbool_t, float16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s32))) -svint32x4_t svld4q_vnum(svbool_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s64))) -svint64x4_t svld4q_vnum(svbool_t, int64_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_s16))) -svint16x4_t svld4q_vnum(svbool_t, int16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4q_vnum_bf16))) -svbfloat16x4_t svld4q_vnum(svbool_t, bfloat16_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f64))) -float64x2_t svmaxnmqv(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f32))) -float32x4_t svmaxnmqv(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxnmqv_f16))) -float16x8_t svmaxnmqv(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f64))) -float64x2_t svmaxqv(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f32))) -float32x4_t svmaxqv(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_f16))) -float16x8_t svmaxqv(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s8))) -int8x16_t svmaxqv(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s32))) -int32x4_t svmaxqv(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s64))) -int64x2_t svmaxqv(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_s16))) -int16x8_t svmaxqv(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u8))) -uint8x16_t svmaxqv(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u32))) -uint32x4_t svmaxqv(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u64))) -uint64x2_t svmaxqv(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmaxqv_u16))) -uint16x8_t svmaxqv(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f64))) -float64x2_t svminnmqv(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f32))) -float32x4_t svminnmqv(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnmqv_f16))) -float16x8_t svminnmqv(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f64))) -float64x2_t svminqv(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f32))) -float32x4_t svminqv(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_f16))) -float16x8_t svminqv(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s8))) -int8x16_t svminqv(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s32))) -int32x4_t svminqv(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s64))) -int64x2_t svminqv(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_s16))) -int16x8_t svminqv(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u8))) -uint8x16_t svminqv(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u32))) -uint32x4_t svminqv(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u64))) -uint64x2_t svminqv(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminqv_u16))) -uint16x8_t svminqv(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u8))) -uint8x16_t svorqv(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u32))) -uint32x4_t svorqv(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u64))) -uint64x2_t svorqv(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_u16))) -uint16x8_t svorqv(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s8))) -int8x16_t svorqv(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s32))) -int32x4_t svorqv(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s64))) -int64x2_t svorqv(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svorqv_s16))) -int16x8_t svorqv(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u8))) -svbool_t svpmov(svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s8))) -svbool_t svpmov(svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u64))) -svbool_t svpmov(svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s64))) -svbool_t svpmov(svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u16))) -svbool_t svpmov(svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s16))) -svbool_t svpmov(svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_u32))) -svbool_t svpmov(svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32))) -svbool_t svpmov(svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u8))) -svbool_t svpmov_lane(svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s8))) -svbool_t svpmov_lane(svint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64))) -svbool_t svpmov_lane(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64))) -svbool_t svpmov_lane(svint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16))) -svbool_t svpmov_lane(svuint16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16))) -svbool_t svpmov_lane(svint16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32))) -svbool_t svpmov_lane(svuint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32))) -svbool_t svpmov_lane(svint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u64_m))) -svuint64_t svpmov_lane_m(svuint64_t, svbool_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s64_m))) -svint64_t svpmov_lane_m(svint64_t, svbool_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u16_m))) -svuint16_t svpmov_lane_m(svuint16_t, svbool_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s16_m))) -svint16_t svpmov_lane_m(svint16_t, svbool_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_u32_m))) -svuint32_t svpmov_lane_m(svuint32_t, svbool_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) -svint32_t svpmov_lane_m(svint32_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) void svst1dq(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) @@ -10735,6 +11371,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64b void svst1q_scatter(svbool_t, svuint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s64))) void svst1q_scatter(svbool_t, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_mf8))) +void svst1q_scatter(svbool_t, svuint64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_s16))) void svst1q_scatter(svbool_t, svuint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_index_u32))) @@ -10779,6 +11417,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64b void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s64))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_mf8))) +void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) @@ -10843,6 +11483,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64o void svst1q_scatter_offset(svbool_t, int32_t *, svint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) void svst1q_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_mf8))) +void svst1q_scatter_offset(svbool_t, mfloat8_t *, svint64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) void svst1q_scatter_offset(svbool_t, int16_t *, svint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) @@ -10867,6 +11509,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64o void svst1q_scatter_offset(svbool_t, int32_t *, svuint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s64))) void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_mf8))) +void svst1q_scatter_offset(svbool_t, mfloat8_t *, svuint64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) @@ -10881,508 +11525,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) void svst1wq_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) void svst1wq_vnum(svbool_t, int32_t *, int64_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) -void svst2q(svbool_t, uint8_t const *, svuint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) -void svst2q(svbool_t, uint32_t const *, svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u64))) -void svst2q(svbool_t, uint64_t const *, svuint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u16))) -void svst2q(svbool_t, uint16_t const *, svuint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s8))) -void svst2q(svbool_t, int8_t const *, svint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f64))) -void svst2q(svbool_t, float64_t const *, svfloat64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f32))) -void svst2q(svbool_t, float32_t const *, svfloat32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_f16))) -void svst2q(svbool_t, float16_t const *, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s32))) -void svst2q(svbool_t, int32_t const *, svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s64))) -void svst2q(svbool_t, int64_t const *, svint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_s16))) -void svst2q(svbool_t, int16_t const *, svint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_bf16))) -void svst2q(svbool_t, bfloat16_t const *, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u8))) -void svst2q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u32))) -void svst2q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u64))) -void svst2q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_u16))) -void svst2q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s8))) -void svst2q_vnum(svbool_t, int8_t const *, int64_t, svint8x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f64))) -void svst2q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f32))) -void svst2q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_f16))) -void svst2q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s32))) -void svst2q_vnum(svbool_t, int32_t const *, int64_t, svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s64))) -void svst2q_vnum(svbool_t, int64_t const *, int64_t, svint64x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_s16))) -void svst2q_vnum(svbool_t, int16_t const *, int64_t, svint16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_vnum_bf16))) -void svst2q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u8))) -void svst3q(svbool_t, uint8_t const *, svuint8x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u32))) -void svst3q(svbool_t, uint32_t const *, svuint32x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u64))) -void svst3q(svbool_t, uint64_t const *, svuint64x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_u16))) -void svst3q(svbool_t, uint16_t const *, svuint16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s8))) -void svst3q(svbool_t, int8_t const *, svint8x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f64))) -void svst3q(svbool_t, float64_t const *, svfloat64x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f32))) -void svst3q(svbool_t, float32_t const *, svfloat32x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_f16))) -void svst3q(svbool_t, float16_t const *, svfloat16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s32))) -void svst3q(svbool_t, int32_t const *, svint32x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s64))) -void svst3q(svbool_t, int64_t const *, svint64x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_s16))) -void svst3q(svbool_t, int16_t const *, svint16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_bf16))) -void svst3q(svbool_t, bfloat16_t const *, svbfloat16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u8))) -void svst3q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u32))) -void svst3q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u64))) -void svst3q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_u16))) -void svst3q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s8))) -void svst3q_vnum(svbool_t, int8_t const *, int64_t, svint8x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f64))) -void svst3q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f32))) -void svst3q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_f16))) -void svst3q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s32))) -void svst3q_vnum(svbool_t, int32_t const *, int64_t, svint32x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s64))) -void svst3q_vnum(svbool_t, int64_t const *, int64_t, svint64x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_s16))) -void svst3q_vnum(svbool_t, int16_t const *, int64_t, svint16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3q_vnum_bf16))) -void svst3q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x3_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u8))) -void svst4q(svbool_t, uint8_t const *, svuint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u32))) -void svst4q(svbool_t, uint32_t const *, svuint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u64))) -void svst4q(svbool_t, uint64_t const *, svuint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_u16))) -void svst4q(svbool_t, uint16_t const *, svuint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s8))) -void svst4q(svbool_t, int8_t const *, svint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f64))) -void svst4q(svbool_t, float64_t const *, svfloat64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f32))) -void svst4q(svbool_t, float32_t const *, svfloat32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_f16))) -void svst4q(svbool_t, float16_t const *, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s32))) -void svst4q(svbool_t, int32_t const *, svint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s64))) -void svst4q(svbool_t, int64_t const *, svint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_s16))) -void svst4q(svbool_t, int16_t const *, svint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_bf16))) -void svst4q(svbool_t, bfloat16_t const *, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u8))) -void svst4q_vnum(svbool_t, uint8_t const *, int64_t, svuint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u32))) -void svst4q_vnum(svbool_t, uint32_t const *, int64_t, svuint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u64))) -void svst4q_vnum(svbool_t, uint64_t const *, int64_t, svuint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_u16))) -void svst4q_vnum(svbool_t, uint16_t const *, int64_t, svuint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s8))) -void svst4q_vnum(svbool_t, int8_t const *, int64_t, svint8x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f64))) -void svst4q_vnum(svbool_t, float64_t const *, int64_t, svfloat64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f32))) -void svst4q_vnum(svbool_t, float32_t const *, int64_t, svfloat32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_f16))) -void svst4q_vnum(svbool_t, float16_t const *, int64_t, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s32))) -void svst4q_vnum(svbool_t, int32_t const *, int64_t, svint32x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s64))) -void svst4q_vnum(svbool_t, int64_t const *, int64_t, svint64x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_s16))) -void svst4q_vnum(svbool_t, int16_t const *, int64_t, svint16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4q_vnum_bf16))) -void svst4q_vnum(svbool_t, bfloat16_t const *, int64_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u8))) -svuint8_t svtblq(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u32))) -svuint32_t svtblq(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u64))) -svuint64_t svtblq(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_u16))) -svuint16_t svtblq(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_bf16))) -svbfloat16_t svtblq(svbfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s8))) -svint8_t svtblq(svint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f64))) -svfloat64_t svtblq(svfloat64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f32))) -svfloat32_t svtblq(svfloat32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_f16))) -svfloat16_t svtblq(svfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) -svint32_t svtblq(svint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) -svint64_t svtblq(svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_mf8))) -svmfloat8_t svtblq(svmfloat8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) -svint16_t svtblq(svint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) -svuint8_t svtbxq(svuint8_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u32))) -svuint32_t svtbxq(svuint32_t, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u64))) -svuint64_t svtbxq(svuint64_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u16))) -svuint16_t svtbxq(svuint16_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_bf16))) -svbfloat16_t svtbxq(svbfloat16_t, svbfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s8))) -svint8_t svtbxq(svint8_t, svint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f64))) -svfloat64_t svtbxq(svfloat64_t, svfloat64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f32))) -svfloat32_t svtbxq(svfloat32_t, svfloat32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_f16))) -svfloat16_t svtbxq(svfloat16_t, svfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) -svint32_t svtbxq(svint32_t, svint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) -svint64_t svtbxq(svint64_t, svint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_mf8))) -svmfloat8_t svtbxq(svmfloat8_t, svmfloat8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) -svint16_t svtbxq(svint16_t, svint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) -svuint8_t svuzpq1(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u32))) -svuint32_t svuzpq1(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u64))) -svuint64_t svuzpq1(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u16))) -svuint16_t svuzpq1(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_bf16))) -svbfloat16_t svuzpq1(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s8))) -svint8_t svuzpq1(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f64))) -svfloat64_t svuzpq1(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f32))) -svfloat32_t svuzpq1(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_f16))) -svfloat16_t svuzpq1(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) -svint32_t svuzpq1(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) -svint64_t svuzpq1(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_mf8))) -svmfloat8_t svuzpq1(svmfloat8_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) -svint16_t svuzpq1(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) -svuint8_t svuzpq2(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u32))) -svuint32_t svuzpq2(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u64))) -svuint64_t svuzpq2(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u16))) -svuint16_t svuzpq2(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_bf16))) -svbfloat16_t svuzpq2(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s8))) -svint8_t svuzpq2(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f64))) -svfloat64_t svuzpq2(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f32))) -svfloat32_t svuzpq2(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_f16))) -svfloat16_t svuzpq2(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) -svint32_t svuzpq2(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) -svint64_t svuzpq2(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_mf8))) -svmfloat8_t svuzpq2(svmfloat8_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) -svint16_t svuzpq2(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) -svuint8_t svzipq1(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u32))) -svuint32_t svzipq1(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u64))) -svuint64_t svzipq1(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u16))) -svuint16_t svzipq1(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_bf16))) -svbfloat16_t svzipq1(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s8))) -svint8_t svzipq1(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f64))) -svfloat64_t svzipq1(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f32))) -svfloat32_t svzipq1(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_f16))) -svfloat16_t svzipq1(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) -svint32_t svzipq1(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) -svint64_t svzipq1(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_mf8))) -svmfloat8_t svzipq1(svmfloat8_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) -svint16_t svzipq1(svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) -svuint8_t svzipq2(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u32))) -svuint32_t svzipq2(svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u64))) -svuint64_t svzipq2(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u16))) -svuint16_t svzipq2(svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_bf16))) -svbfloat16_t svzipq2(svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s8))) -svint8_t svzipq2(svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f64))) -svfloat64_t svzipq2(svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f32))) -svfloat32_t svzipq2(svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_f16))) -svfloat16_t svzipq2(svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) -svint32_t svzipq2(svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) -svint64_t svzipq2(svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) -svmfloat8_t svzipq2(svmfloat8_t, svmfloat8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) -svint16_t svzipq2(svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) -svbfloat16_t svdup_laneq_bf16(svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) -svbfloat16_t svdup_laneq(svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) -svint8_t svclamp_s8(svint8_t, svint8_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) -svint32_t svclamp_s32(svint32_t, svint32_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) -svint64_t svclamp_s64(svint64_t, svint64_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) -svint16_t svclamp_s16(svint16_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) -svuint8_t svclamp_u8(svuint8_t, svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) -svuint32_t svclamp_u32(svuint32_t, svuint32_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) -svuint64_t svclamp_u64(svuint64_t, svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) -svuint16_t svclamp_u16(svuint16_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b16))) -svbool_t svpsel_lane_b16(svbool_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b32))) -svbool_t svpsel_lane_b32(svbool_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b64))) -svbool_t svpsel_lane_b64(svbool_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpsel_lane_b8))) -svbool_t svpsel_lane_b8(svbool_t, svbool_t, uint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) -svuint8_t svrevd_u8_m(svuint8_t, svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) -svuint32_t svrevd_u32_m(svuint32_t, svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) -svuint64_t svrevd_u64_m(svuint64_t, svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) -svuint16_t svrevd_u16_m(svuint16_t, svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) -svbfloat16_t svrevd_bf16_m(svbfloat16_t, svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) -svint8_t svrevd_s8_m(svint8_t, svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) -svfloat64_t svrevd_f64_m(svfloat64_t, svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) -svfloat32_t svrevd_f32_m(svfloat32_t, svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) -svfloat16_t svrevd_f16_m(svfloat16_t, svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) -svint32_t svrevd_s32_m(svint32_t, svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) -svint64_t svrevd_s64_m(svint64_t, svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) -svint16_t svrevd_s16_m(svint16_t, svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) -svuint8_t svrevd_u8_x(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) -svuint32_t svrevd_u32_x(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) -svuint64_t svrevd_u64_x(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) -svuint16_t svrevd_u16_x(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) -svbfloat16_t svrevd_bf16_x(svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) -svint8_t svrevd_s8_x(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) -svfloat64_t svrevd_f64_x(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) -svfloat32_t svrevd_f32_x(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) -svfloat16_t svrevd_f16_x(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) -svint32_t svrevd_s32_x(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) -svint64_t svrevd_s64_x(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) -svint16_t svrevd_s16_x(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) -svuint8_t svrevd_u8_z(svbool_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) -svuint32_t svrevd_u32_z(svbool_t, svuint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) -svuint64_t svrevd_u64_z(svbool_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) -svuint16_t svrevd_u16_z(svbool_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) -svbfloat16_t svrevd_bf16_z(svbool_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) -svint8_t svrevd_s8_z(svbool_t, svint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) -svfloat64_t svrevd_f64_z(svbool_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) -svfloat32_t svrevd_f32_z(svbool_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) -svfloat16_t svrevd_f16_z(svbool_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) -svint32_t svrevd_s32_z(svbool_t, svint32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) -svint64_t svrevd_s64_z(svbool_t, svint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) -svint16_t svrevd_s16_z(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s8))) -svint8_t svclamp(svint8_t, svint8_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s32))) -svint32_t svclamp(svint32_t, svint32_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s64))) -svint64_t svclamp(svint64_t, svint64_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_s16))) -svint16_t svclamp(svint16_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u8))) -svuint8_t svclamp(svuint8_t, svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u32))) -svuint32_t svclamp(svuint32_t, svuint32_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u64))) -svuint64_t svclamp(svuint64_t, svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_u16))) -svuint16_t svclamp(svuint16_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_m))) -svuint8_t svrevd_m(svuint8_t, svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_m))) -svuint32_t svrevd_m(svuint32_t, svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_m))) -svuint64_t svrevd_m(svuint64_t, svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_m))) -svuint16_t svrevd_m(svuint16_t, svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_m))) -svbfloat16_t svrevd_m(svbfloat16_t, svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_m))) -svint8_t svrevd_m(svint8_t, svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_m))) -svfloat64_t svrevd_m(svfloat64_t, svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_m))) -svfloat32_t svrevd_m(svfloat32_t, svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_m))) -svfloat16_t svrevd_m(svfloat16_t, svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_m))) -svint32_t svrevd_m(svint32_t, svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_m))) -svint64_t svrevd_m(svint64_t, svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_m))) -svint16_t svrevd_m(svint16_t, svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_x))) -svuint8_t svrevd_x(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_x))) -svuint32_t svrevd_x(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_x))) -svuint64_t svrevd_x(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_x))) -svuint16_t svrevd_x(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_x))) -svbfloat16_t svrevd_x(svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_x))) -svint8_t svrevd_x(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_x))) -svfloat64_t svrevd_x(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_x))) -svfloat32_t svrevd_x(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_x))) -svfloat16_t svrevd_x(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_x))) -svint32_t svrevd_x(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_x))) -svint64_t svrevd_x(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_x))) -svint16_t svrevd_x(svbool_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u8_z))) -svuint8_t svrevd_z(svbool_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u32_z))) -svuint32_t svrevd_z(svbool_t, svuint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u64_z))) -svuint64_t svrevd_z(svbool_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_u16_z))) -svuint16_t svrevd_z(svbool_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_bf16_z))) -svbfloat16_t svrevd_z(svbool_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s8_z))) -svint8_t svrevd_z(svbool_t, svint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f64_z))) -svfloat64_t svrevd_z(svbool_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f32_z))) -svfloat32_t svrevd_z(svbool_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_f16_z))) -svfloat16_t svrevd_z(svbool_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s32_z))) -svint32_t svrevd_z(svbool_t, svint32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s64_z))) -svint64_t svrevd_z(svbool_t, svint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrevd_s16_z))) -svint16_t svrevd_z(svbool_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) -svfloat32_t svbfmlslb_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) -svfloat32_t svbfmlslb_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) -svfloat32_t svbfmlslt_f32(svfloat32_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) -svfloat32_t svbfmlslt_lane_f32(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) -svfloat64_t svclamp_f64(svfloat64_t, svfloat64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) -svfloat32_t svclamp_f32(svfloat32_t, svfloat32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) -svfloat16_t svclamp_f16(svfloat16_t, svfloat16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c8))) uint64_t svcntp_c8(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c32))) @@ -11391,30 +11533,12 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c64))) uint64_t svcntp_c64(svcount_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcntp_c16))) uint64_t svcntp_c16(svcount_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) -svboolx2_t svcreate2_b(svbool_t, svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) -svboolx4_t svcreate4_b(svbool_t, svbool_t, svbool_t, svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) -svfloat32_t svdot_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) -svint32_t svdot_s32_s16(svint32_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) -svuint32_t svdot_u32_u16(svuint32_t, svuint16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) -svfloat32_t svdot_lane_f32_f16(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) -svint32_t svdot_lane_s32_s16(svint32_t, svint16_t, svint16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) -svuint32_t svdot_lane_u32_u16(svuint32_t, svuint16_t, svuint16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) -svbool_t svget2_b(svboolx2_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) -svbool_t svget4_b(svboolx4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) svuint8x2_t svld1_u8_x2(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) svint8x2_t svld1_s8_x2(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2))) +svmfloat8x2_t svld1_mf8_x2(svcount_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) svuint64x2_t svld1_u64_x2(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) @@ -11439,6 +11563,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) svuint8x4_t svld1_u8_x4(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) svint8x4_t svld1_s8_x4(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4))) +svmfloat8x4_t svld1_mf8_x4(svcount_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) svuint64x4_t svld1_u64_x4(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) @@ -11463,6 +11589,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) svuint8x2_t svld1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) svint8x2_t svld1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2))) +svmfloat8x2_t svld1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) svuint64x2_t svld1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) @@ -11487,6 +11615,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) svuint8x4_t svld1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) svint8x4_t svld1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4))) +svmfloat8x4_t svld1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) svuint64x4_t svld1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) @@ -11511,6 +11641,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) svuint8x2_t svldnt1_u8_x2(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) svint8x2_t svldnt1_s8_x2(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2))) +svmfloat8x2_t svldnt1_mf8_x2(svcount_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) svuint64x2_t svldnt1_u64_x2(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) @@ -11535,6 +11667,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) svuint8x4_t svldnt1_u8_x4(svcount_t, uint8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) svint8x4_t svldnt1_s8_x4(svcount_t, int8_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4))) +svmfloat8x4_t svldnt1_mf8_x4(svcount_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) svuint64x4_t svldnt1_u64_x4(svcount_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) @@ -11559,6 +11693,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2)) svuint8x2_t svldnt1_vnum_u8_x2(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) svint8x2_t svldnt1_vnum_s8_x2(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2))) +svmfloat8x2_t svldnt1_vnum_mf8_x2(svcount_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) svuint64x2_t svldnt1_vnum_u64_x2(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) @@ -11583,6 +11719,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4)) svuint8x4_t svldnt1_vnum_u8_x4(svcount_t, uint8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) svint8x4_t svldnt1_vnum_s8_x4(svcount_t, int8_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4))) +svmfloat8x4_t svldnt1_vnum_mf8_x4(svcount_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) svuint64x4_t svldnt1_vnum_u64_x4(svcount_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) @@ -11637,30 +11775,16 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c64))) svcount_t svptrue_c64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svptrue_c16))) svcount_t svptrue_c16(void); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) -svint16_t svqcvtn_s16_s32_x2(svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) -svuint16_t svqcvtn_u16_s32_x2(svint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) -svuint16_t svqcvtn_u16_u32_x2(svuint32x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) -svint16_t svqrshrn_n_s16_s32_x2(svint32x2_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) -svuint16_t svqrshrn_n_u16_u32_x2(svuint32x2_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) -svuint16_t svqrshrun_n_u16_s32_x2(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) svbool_t svreinterpret_b(svcount_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) svcount_t svreinterpret_c(svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) -svboolx2_t svset2_b(svboolx2_t, uint64_t, svbool_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) -svboolx4_t svset4_b(svboolx4_t, uint64_t, svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) void svst1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) void svst1_s8_x2(svcount_t, int8_t *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2))) +void svst1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) void svst1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) @@ -11685,6 +11809,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) void svst1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) void svst1_s8_x4(svcount_t, int8_t *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4))) +void svst1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) void svst1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) @@ -11709,6 +11835,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) void svst1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) void svst1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2))) +void svst1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) void svst1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) @@ -11733,6 +11861,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) void svst1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) void svst1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4))) +void svst1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) void svst1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) @@ -11757,6 +11887,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) void svstnt1_u8_x2(svcount_t, uint8_t *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) void svstnt1_s8_x2(svcount_t, int8_t *, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2))) +void svstnt1_mf8_x2(svcount_t, mfloat8_t *, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) void svstnt1_u64_x2(svcount_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) @@ -11781,6 +11913,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) void svstnt1_u8_x4(svcount_t, uint8_t *, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) void svstnt1_s8_x4(svcount_t, int8_t *, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4))) +void svstnt1_mf8_x4(svcount_t, mfloat8_t *, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) void svstnt1_u64_x4(svcount_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) @@ -11805,6 +11939,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2)) void svstnt1_vnum_u8_x2(svcount_t, uint8_t *, int64_t, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) void svstnt1_vnum_s8_x2(svcount_t, int8_t *, int64_t, svint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2))) +void svstnt1_vnum_mf8_x2(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) void svstnt1_vnum_u64_x2(svcount_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) @@ -11829,6 +11965,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4)) void svstnt1_vnum_u8_x4(svcount_t, uint8_t *, int64_t, svuint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) void svstnt1_vnum_s8_x4(svcount_t, int8_t *, int64_t, svint8x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4))) +void svstnt1_vnum_mf8_x4(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) void svstnt1_vnum_u64_x4(svcount_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) @@ -11849,10 +11987,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f32_x4) void svstnt1_vnum_f32_x4(svcount_t, float32_t *, int64_t, svfloat32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32_x4))) void svstnt1_vnum_s32_x4(svcount_t, int32_t *, int64_t, svint32x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_b))) -svboolx2_t svundef2_b(); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_b))) -svboolx4_t svundef4_b(); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c8_s64))) svcount_t svwhilege_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c32_s64))) @@ -11869,22 +12003,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64))) svcount_t svwhilege_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) svcount_t svwhilege_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) -svboolx2_t svwhilege_b8_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) -svboolx2_t svwhilege_b32_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) -svboolx2_t svwhilege_b64_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) -svboolx2_t svwhilege_b16_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) -svboolx2_t svwhilege_b8_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) -svboolx2_t svwhilege_b32_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) -svboolx2_t svwhilege_b64_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) -svboolx2_t svwhilege_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) svcount_t svwhilegt_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) @@ -11901,22 +12019,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64))) svcount_t svwhilegt_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) svcount_t svwhilegt_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) -svboolx2_t svwhilegt_b8_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) -svboolx2_t svwhilegt_b32_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) -svboolx2_t svwhilegt_b64_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) -svboolx2_t svwhilegt_b16_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) -svboolx2_t svwhilegt_b8_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) -svboolx2_t svwhilegt_b32_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) -svboolx2_t svwhilegt_b64_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) -svboolx2_t svwhilegt_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) svcount_t svwhilele_c8_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) @@ -11933,22 +12035,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64))) svcount_t svwhilele_c64_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) svcount_t svwhilele_c16_u64(uint64_t, uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) -svboolx2_t svwhilele_b8_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) -svboolx2_t svwhilele_b32_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) -svboolx2_t svwhilele_b64_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) -svboolx2_t svwhilele_b16_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) -svboolx2_t svwhilele_b8_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) -svboolx2_t svwhilele_b32_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) -svboolx2_t svwhilele_b64_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) -svboolx2_t svwhilele_b16_u64_x2(uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) svcount_t svwhilelt_c8_u64(uint64_t, uint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) @@ -11965,60 +12051,12 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64))) svcount_t svwhilelt_c64_s64(int64_t, int64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) svcount_t svwhilelt_c16_s64(int64_t, int64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) -svboolx2_t svwhilelt_b8_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) -svboolx2_t svwhilelt_b32_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) -svboolx2_t svwhilelt_b64_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) -svboolx2_t svwhilelt_b16_u64_x2(uint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) -svboolx2_t svwhilelt_b8_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) -svboolx2_t svwhilelt_b32_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) -svboolx2_t svwhilelt_b64_s64_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) -svboolx2_t svwhilelt_b16_s64_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_f32))) -svfloat32_t svbfmlslb(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslb_lane_f32))) -svfloat32_t svbfmlslb_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_f32))) -svfloat32_t svbfmlslt(svfloat32_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbfmlslt_lane_f32))) -svfloat32_t svbfmlslt_lane(svfloat32_t, svbfloat16_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f64))) -svfloat64_t svclamp(svfloat64_t, svfloat64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f32))) -svfloat32_t svclamp(svfloat32_t, svfloat32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_f16))) -svfloat16_t svclamp(svfloat16_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_b))) -svboolx2_t svcreate2(svbool_t, svbool_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_b))) -svboolx4_t svcreate4(svbool_t, svbool_t, svbool_t, svbool_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_f16))) -svfloat32_t svdot(svfloat32_t, svfloat16_t, svfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_s32_s16))) -svint32_t svdot(svint32_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_u32_u16))) -svuint32_t svdot(svuint32_t, svuint16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_f16))) -svfloat32_t svdot_lane(svfloat32_t, svfloat16_t, svfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_s32_s16))) -svint32_t svdot_lane(svint32_t, svint16_t, svint16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_u32_u16))) -svuint32_t svdot_lane(svuint32_t, svuint16_t, svuint16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_b))) -svbool_t svget2(svboolx2_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_b))) -svbool_t svget4(svboolx4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x2))) svuint8x2_t svld1_x2(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x2))) svint8x2_t svld1_x2(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x2))) +svmfloat8x2_t svld1_x2(svcount_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x2))) svuint64x2_t svld1_x2(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x2))) @@ -12043,6 +12081,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u8_x4))) svuint8x4_t svld1_x4(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8_x4))) svint8x4_t svld1_x4(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8_x4))) +svmfloat8x4_t svld1_x4(svcount_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64_x4))) svuint64x4_t svld1_x4(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64_x4))) @@ -12067,6 +12107,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x2))) svuint8x2_t svld1_vnum_x2(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x2))) svint8x2_t svld1_vnum_x2(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x2))) +svmfloat8x2_t svld1_vnum_x2(svcount_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x2))) svuint64x2_t svld1_vnum_x2(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x2))) @@ -12091,6 +12133,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8_x4))) svuint8x4_t svld1_vnum_x4(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8_x4))) svint8x4_t svld1_vnum_x4(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8_x4))) +svmfloat8x4_t svld1_vnum_x4(svcount_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64_x4))) svuint64x4_t svld1_vnum_x4(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64_x4))) @@ -12115,6 +12159,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x2))) svuint8x2_t svldnt1_x2(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x2))) svint8x2_t svldnt1_x2(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x2))) +svmfloat8x2_t svldnt1_x2(svcount_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x2))) svuint64x2_t svldnt1_x2(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x2))) @@ -12139,6 +12185,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8_x4))) svuint8x4_t svldnt1_x4(svcount_t, uint8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8_x4))) svint8x4_t svldnt1_x4(svcount_t, int8_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8_x4))) +svmfloat8x4_t svldnt1_x4(svcount_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64_x4))) svuint64x4_t svldnt1_x4(svcount_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64_x4))) @@ -12163,6 +12211,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x2) svuint8x2_t svldnt1_vnum_x2(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x2))) svint8x2_t svldnt1_vnum_x2(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x2))) +svmfloat8x2_t svldnt1_vnum_x2(svcount_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x2))) svuint64x2_t svldnt1_vnum_x2(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x2))) @@ -12187,6 +12237,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8_x4) svuint8x4_t svldnt1_vnum_x4(svcount_t, uint8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8_x4))) svint8x4_t svldnt1_vnum_x4(svcount_t, int8_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8_x4))) +svmfloat8x4_t svldnt1_vnum_x4(svcount_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64_x4))) svuint64x4_t svldnt1_vnum_x4(svcount_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64_x4))) @@ -12207,30 +12259,16 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f32_x4 svfloat32x4_t svldnt1_vnum_x4(svcount_t, float32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32_x4))) svint32x4_t svldnt1_vnum_x4(svcount_t, int32_t const *, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_s16_s32_x2))) -svint16_t svqcvtn_s16(svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_s32_x2))) -svuint16_t svqcvtn_u16(svint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqcvtn_u16_u32_x2))) -svuint16_t svqcvtn_u16(svuint32x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_s16_s32_x2))) -svint16_t svqrshrn_s16(svint32x2_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrn_n_u16_u32_x2))) -svuint16_t svqrshrn_u16(svuint32x2_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqrshrun_n_u16_s32_x2))) -svuint16_t svqrshrun_u16(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_b))) svbool_t svreinterpret(svcount_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svreinterpret_c))) svcount_t svreinterpret(svbool_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_b))) -svboolx2_t svset2(svboolx2_t, uint64_t, svbool_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_b))) -svboolx4_t svset4(svboolx4_t, uint64_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x2))) void svst1(svcount_t, uint8_t *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x2))) void svst1(svcount_t, int8_t *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x2))) +void svst1(svcount_t, mfloat8_t *, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x2))) void svst1(svcount_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x2))) @@ -12255,6 +12293,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u8_x4))) void svst1(svcount_t, uint8_t *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8_x4))) void svst1(svcount_t, int8_t *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8_x4))) +void svst1(svcount_t, mfloat8_t *, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64_x4))) void svst1(svcount_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64_x4))) @@ -12279,6 +12319,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x2))) void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x2))) void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x2))) +void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x2))) void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x2))) @@ -12303,6 +12345,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8_x4))) void svst1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8_x4))) void svst1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8_x4))) +void svst1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64_x4))) void svst1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64_x4))) @@ -12327,6 +12371,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x2))) void svstnt1(svcount_t, uint8_t *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x2))) void svstnt1(svcount_t, int8_t *, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x2))) +void svstnt1(svcount_t, mfloat8_t *, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x2))) void svstnt1(svcount_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x2))) @@ -12351,6 +12397,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8_x4))) void svstnt1(svcount_t, uint8_t *, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8_x4))) void svstnt1(svcount_t, int8_t *, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8_x4))) +void svstnt1(svcount_t, mfloat8_t *, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64_x4))) void svstnt1(svcount_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64_x4))) @@ -12375,6 +12423,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x2) void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x2))) void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x2))) +void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x2))) void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x2))) @@ -12399,6 +12449,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8_x4) void svstnt1_vnum(svcount_t, uint8_t *, int64_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8_x4))) void svstnt1_vnum(svcount_t, int8_t *, int64_t, svint8x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8_x4))) +void svstnt1_vnum(svcount_t, mfloat8_t *, int64_t, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64_x4))) void svstnt1_vnum(svcount_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64_x4))) @@ -12435,22 +12487,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c64_u64)) svcount_t svwhilege_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_c16_u64))) svcount_t svwhilege_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_s64_x2))) -svboolx2_t svwhilege_b8_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_s64_x2))) -svboolx2_t svwhilege_b32_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_s64_x2))) -svboolx2_t svwhilege_b64_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_s64_x2))) -svboolx2_t svwhilege_b16_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b8_u64_x2))) -svboolx2_t svwhilege_b8_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b32_u64_x2))) -svboolx2_t svwhilege_b32_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b64_u64_x2))) -svboolx2_t svwhilege_b64_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilege_b16_u64_x2))) -svboolx2_t svwhilege_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c8_s64))) svcount_t svwhilegt_c8(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c32_s64))) @@ -12467,22 +12503,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c64_u64)) svcount_t svwhilegt_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_c16_u64))) svcount_t svwhilegt_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_s64_x2))) -svboolx2_t svwhilegt_b8_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_s64_x2))) -svboolx2_t svwhilegt_b32_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_s64_x2))) -svboolx2_t svwhilegt_b64_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_s64_x2))) -svboolx2_t svwhilegt_b16_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b8_u64_x2))) -svboolx2_t svwhilegt_b8_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b32_u64_x2))) -svboolx2_t svwhilegt_b32_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b64_u64_x2))) -svboolx2_t svwhilegt_b64_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilegt_b16_u64_x2))) -svboolx2_t svwhilegt_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c8_s64))) svcount_t svwhilele_c8(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c32_s64))) @@ -12499,22 +12519,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c64_u64)) svcount_t svwhilele_c64(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_c16_u64))) svcount_t svwhilele_c16(uint64_t, uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_s64_x2))) -svboolx2_t svwhilele_b8_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_s64_x2))) -svboolx2_t svwhilele_b32_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_s64_x2))) -svboolx2_t svwhilele_b64_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_s64_x2))) -svboolx2_t svwhilele_b16_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b8_u64_x2))) -svboolx2_t svwhilele_b8_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b32_u64_x2))) -svboolx2_t svwhilele_b32_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b64_u64_x2))) -svboolx2_t svwhilele_b64_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilele_b16_u64_x2))) -svboolx2_t svwhilele_b16_x2(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c8_u64))) svcount_t svwhilelt_c8(uint64_t, uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c32_u64))) @@ -12531,70 +12535,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c64_s64)) svcount_t svwhilelt_c64(int64_t, int64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_c16_s64))) svcount_t svwhilelt_c16(int64_t, int64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_u64_x2))) -svboolx2_t svwhilelt_b8_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_u64_x2))) -svboolx2_t svwhilelt_b32_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_u64_x2))) -svboolx2_t svwhilelt_b64_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_u64_x2))) -svboolx2_t svwhilelt_b16_x2(uint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b8_s64_x2))) -svboolx2_t svwhilelt_b8_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b32_s64_x2))) -svboolx2_t svwhilelt_b32_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b64_s64_x2))) -svboolx2_t svwhilelt_b64_x2(int64_t, int64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilelt_b16_s64_x2))) -svboolx2_t svwhilelt_b16_x2(int64_t, int64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) -svuint8_t svdup_laneq_u8(svuint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) -svint8_t svdup_laneq_s8(svint8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_mf8))) -svmfloat8_t svdup_laneq_mf8(svmfloat8_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) -svuint64_t svdup_laneq_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) -svfloat64_t svdup_laneq_f64(svfloat64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s64))) -svint64_t svdup_laneq_s64(svint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u16))) -svuint16_t svdup_laneq_u16(svuint16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f16))) -svfloat16_t svdup_laneq_f16(svfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s16))) -svint16_t svdup_laneq_s16(svint16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u32))) -svuint32_t svdup_laneq_u32(svuint32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f32))) -svfloat32_t svdup_laneq_f32(svfloat32_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s32))) -svint32_t svdup_laneq_s32(svint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) -svuint8_t svdup_laneq(svuint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) -svint8_t svdup_laneq(svint8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_mf8))) -svmfloat8_t svdup_laneq(svmfloat8_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) -svuint64_t svdup_laneq(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) -svfloat64_t svdup_laneq(svfloat64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s64))) -svint64_t svdup_laneq(svint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u16))) -svuint16_t svdup_laneq(svuint16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f16))) -svfloat16_t svdup_laneq(svfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s16))) -svint16_t svdup_laneq(svint16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u32))) -svuint32_t svdup_laneq(svuint32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f32))) -svfloat32_t svdup_laneq(svfloat32_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s32))) -svint32_t svdup_laneq(svint32_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s8))) svint8_t svaba_n_s8(svint8_t, svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaba_n_s32))) @@ -15641,6 +15581,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u64))) svuint64_t svtbl2_u64(svuint64x2_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u16))) svuint16_t svtbl2_u16(svuint16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) +svbfloat16_t svtbl2_bf16(svbfloat16x2_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s8))) svint8_t svtbl2_s8(svint8x2_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f64))) @@ -15663,6 +15605,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u64))) svuint64_t svtbx_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u16))) svuint16_t svtbx_u16(svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) +svbfloat16_t svtbx_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s8))) svint8_t svtbx_s8(svint8_t, svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f64))) @@ -15801,6 +15745,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s64))) svbool_t svwhilerw_s64(int64_t const *, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u16))) svbool_t svwhilerw_u16(uint16_t const *, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) +svbool_t svwhilerw_bf16(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f16))) svbool_t svwhilerw_f16(float16_t const *, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s16))) @@ -15823,6 +15769,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s64))) svbool_t svwhilewr_s64(int64_t const *, int64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u16))) svbool_t svwhilewr_u16(uint16_t const *, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) +svbool_t svwhilewr_bf16(bfloat16_t const *, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f16))) svbool_t svwhilewr_f16(float16_t const *, float16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s16))) @@ -18895,6 +18843,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u64))) svuint64_t svtbl2(svuint64x2_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_u16))) svuint16_t svtbl2(svuint16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) +svbfloat16_t svtbl2(svbfloat16x2_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_s8))) svint8_t svtbl2(svint8x2_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_f64))) @@ -18917,6 +18867,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u64))) svuint64_t svtbx(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_u16))) svuint16_t svtbx(svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) +svbfloat16_t svtbx(svbfloat16_t, svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_s8))) svint8_t svtbx(svint8_t, svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_f64))) @@ -19055,6 +19007,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s64))) svbool_t svwhilerw(int64_t const *, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_u16))) svbool_t svwhilerw(uint16_t const *, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) +svbool_t svwhilerw(bfloat16_t const *, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_f16))) svbool_t svwhilerw(float16_t const *, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_s16))) @@ -19077,6 +19031,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s64))) svbool_t svwhilewr(int64_t const *, int64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_u16))) svbool_t svwhilewr(uint16_t const *, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) +svbool_t svwhilewr(bfloat16_t const *, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_f16))) svbool_t svwhilewr(float16_t const *, float16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_s16))) @@ -19839,6 +19795,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u64))) uint64_t svclasta_n_u64(svbool_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u16))) uint16_t svclasta_n_u16(svbool_t, uint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) +bfloat16_t svclasta_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s8))) int8_t svclasta_n_s8(svbool_t, int8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f64))) @@ -19861,6 +19819,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u64))) svuint64_t svclasta_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u16))) svuint16_t svclasta_u16(svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) +svbfloat16_t svclasta_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s8))) svint8_t svclasta_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f64))) @@ -19883,6 +19843,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u64))) uint64_t svclastb_n_u64(svbool_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u16))) uint16_t svclastb_n_u16(svbool_t, uint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) +bfloat16_t svclastb_n_bf16(svbool_t, bfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s8))) int8_t svclastb_n_s8(svbool_t, int8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f64))) @@ -19905,6 +19867,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u64))) svuint64_t svclastb_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u16))) svuint16_t svclastb_u16(svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) +svbfloat16_t svclastb_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s8))) svint8_t svclastb_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f64))) @@ -20465,6 +20429,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_m))) svuint64_t svcnt_u64_m(svuint64_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_m))) svuint16_t svcnt_u16_m(svuint16_t, svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) +svuint16_t svcnt_bf16_m(svuint16_t, svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_m))) svuint8_t svcnt_s8_m(svuint8_t, svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_m))) @@ -20487,6 +20453,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_x))) svuint64_t svcnt_u64_x(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_x))) svuint16_t svcnt_u16_x(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) +svuint16_t svcnt_bf16_x(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_x))) svuint8_t svcnt_s8_x(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_x))) @@ -20509,6 +20477,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_z))) svuint64_t svcnt_u64_z(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_z))) svuint16_t svcnt_u16_z(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) +svuint16_t svcnt_bf16_z(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_z))) svuint8_t svcnt_s8_z(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_z))) @@ -20555,6 +20525,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u64))) svuint64x2_t svcreate2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u16))) svuint16x2_t svcreate2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) +svbfloat16x2_t svcreate2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s8))) svint8x2_t svcreate2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f64))) @@ -20567,6 +20539,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s32))) svint32x2_t svcreate2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s64))) svint64x2_t svcreate2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_mf8))) +svmfloat8x2_t svcreate2_mf8(svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s16))) svint16x2_t svcreate2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u8))) @@ -20577,6 +20551,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u64))) svuint64x3_t svcreate3_u64(svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u16))) svuint16x3_t svcreate3_u16(svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) +svbfloat16x3_t svcreate3_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s8))) svint8x3_t svcreate3_s8(svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f64))) @@ -20589,6 +20565,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s32))) svint32x3_t svcreate3_s32(svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s64))) svint64x3_t svcreate3_s64(svint64_t, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_mf8))) +svmfloat8x3_t svcreate3_mf8(svmfloat8_t, svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s16))) svint16x3_t svcreate3_s16(svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u8))) @@ -20599,6 +20577,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u64))) svuint64x4_t svcreate4_u64(svuint64_t, svuint64_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u16))) svuint16x4_t svcreate4_u16(svuint16_t, svuint16_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) +svbfloat16x4_t svcreate4_bf16(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s8))) svint8x4_t svcreate4_s8(svint8_t, svint8_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f64))) @@ -20611,6 +20591,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s32))) svint32x4_t svcreate4_s32(svint32_t, svint32_t, svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s64))) svint64x4_t svcreate4_s64(svint64_t, svint64_t, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_mf8))) +svmfloat8x4_t svcreate4_mf8(svmfloat8_t, svmfloat8_t, svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s16))) svint16x4_t svcreate4_s16(svint16_t, svint16_t, svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_m))) @@ -21017,6 +20999,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64))) svuint64_t svdup_n_u64(uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16))) svuint16_t svdup_n_u16(uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) +svbfloat16_t svdup_n_bf16(bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8))) svint8_t svdup_n_s8(int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64))) @@ -21039,6 +21023,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_m))) svuint64_t svdup_n_u64_m(svuint64_t, svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_m))) svuint16_t svdup_n_u16_m(svuint16_t, svbool_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) +svbfloat16_t svdup_n_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_m))) svint8_t svdup_n_s8_m(svint8_t, svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_m))) @@ -21069,6 +21055,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_x))) svuint64_t svdup_n_u64_x(svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_x))) svuint16_t svdup_n_u16_x(svbool_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) +svbfloat16_t svdup_n_bf16_x(svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_x))) svint8_t svdup_n_s8_x(svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_x))) @@ -21091,6 +21079,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_z))) svuint64_t svdup_n_u64_z(svbool_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_z))) svuint16_t svdup_n_u16_z(svbool_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) +svbfloat16_t svdup_n_bf16_z(svbool_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_z))) svint8_t svdup_n_s8_z(svbool_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_z))) @@ -21113,6 +21103,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u64))) svuint64_t svdup_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u16))) svuint16_t svdup_lane_u16(svuint16_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) +svbfloat16_t svdup_lane_bf16(svbfloat16_t, uint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s8))) svint8_t svdup_lane_s8(svint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f64))) @@ -21133,6 +21125,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s8))) svint8_t svdupq_n_s8(int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u16))) svuint16_t svdupq_n_u16(uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) +svbfloat16_t svdupq_n_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f16))) svfloat16_t svdupq_n_f16(float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s16))) @@ -21165,6 +21159,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u64))) svuint64_t svdupq_lane_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u16))) svuint16_t svdupq_lane_u16(svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) +svbfloat16_t svdupq_lane_bf16(svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s8))) svint8_t svdupq_lane_s8(svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f64))) @@ -21301,6 +21297,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u64))) svuint64_t svext_u64(svuint64_t, svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u16))) svuint16_t svext_u16(svuint16_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) +svbfloat16_t svext_bf16(svbfloat16_t, svbfloat16_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s8))) svint8_t svext_s8(svint8_t, svint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f64))) @@ -21395,6 +21393,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u64))) svuint64_t svget2_u64(svuint64x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u16))) svuint16_t svget2_u16(svuint16x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) +svbfloat16_t svget2_bf16(svbfloat16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s8))) svint8_t svget2_s8(svint8x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f64))) @@ -21407,6 +21407,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s32))) svint32_t svget2_s32(svint32x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s64))) svint64_t svget2_s64(svint64x2_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_mf8))) +svmfloat8_t svget2_mf8(svmfloat8x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s16))) svint16_t svget2_s16(svint16x2_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u8))) @@ -21417,6 +21419,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u64))) svuint64_t svget3_u64(svuint64x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u16))) svuint16_t svget3_u16(svuint16x3_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) +svbfloat16_t svget3_bf16(svbfloat16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s8))) svint8_t svget3_s8(svint8x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f64))) @@ -21429,6 +21433,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s32))) svint32_t svget3_s32(svint32x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s64))) svint64_t svget3_s64(svint64x3_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_mf8))) +svmfloat8_t svget3_mf8(svmfloat8x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s16))) svint16_t svget3_s16(svint16x3_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u8))) @@ -21439,6 +21445,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u64))) svuint64_t svget4_u64(svuint64x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u16))) svuint16_t svget4_u16(svuint16x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) +svbfloat16_t svget4_bf16(svbfloat16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s8))) svint8_t svget4_s8(svint8x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f64))) @@ -21451,6 +21459,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s32))) svint32_t svget4_s32(svint32x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s64))) svint64_t svget4_s64(svint64x4_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_mf8))) +svmfloat8_t svget4_mf8(svmfloat8x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s16))) svint16_t svget4_s16(svint16x4_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svindex_u8))) @@ -21477,6 +21487,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u64))) svuint64_t svinsr_n_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u16))) svuint16_t svinsr_n_u16(svuint16_t, uint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) +svbfloat16_t svinsr_n_bf16(svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s8))) svint8_t svinsr_n_s8(svint8_t, int8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f64))) @@ -21499,6 +21511,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u64))) uint64_t svlasta_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u16))) uint16_t svlasta_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) +bfloat16_t svlasta_bf16(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s8))) int8_t svlasta_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f64))) @@ -21521,6 +21535,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u64))) uint64_t svlastb_u64(svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u16))) uint16_t svlastb_u16(svbool_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) +bfloat16_t svlastb_bf16(svbool_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s8))) int8_t svlastb_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f64))) @@ -21543,6 +21559,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64))) svuint64_t svld1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16))) svuint16_t svld1_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) +svbfloat16_t svld1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8))) svint8_t svld1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64))) @@ -21555,6 +21573,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32))) svint32_t svld1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64))) svint64_t svld1_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8))) +svmfloat8_t svld1_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16))) svint16_t svld1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8))) @@ -21565,6 +21585,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64))) svuint64_t svld1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16))) svuint16_t svld1_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) +svbfloat16_t svld1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8))) svint8_t svld1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64))) @@ -21577,6 +21599,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32))) svint32_t svld1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64))) svint64_t svld1_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8))) +svmfloat8_t svld1_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16))) svint16_t svld1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u8))) @@ -21587,6 +21611,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u64))) svuint64_t svld1rq_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u16))) svuint16_t svld1rq_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) +svbfloat16_t svld1rq_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s8))) svint8_t svld1rq_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f64))) @@ -21599,6 +21625,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s32))) svint32_t svld1rq_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s64))) svint64_t svld1rq_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_mf8))) +svmfloat8_t svld1rq_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s16))) svint16_t svld1rq_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1sb_vnum_u32))) @@ -21705,6 +21733,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u64))) svuint64x2_t svld2_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u16))) svuint16x2_t svld2_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) +svbfloat16x2_t svld2_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s8))) svint8x2_t svld2_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f64))) @@ -21717,6 +21747,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s32))) svint32x2_t svld2_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s64))) svint64x2_t svld2_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_mf8))) +svmfloat8x2_t svld2_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s16))) svint16x2_t svld2_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u8))) @@ -21727,6 +21759,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u64))) svuint64x2_t svld2_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u16))) svuint16x2_t svld2_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) +svbfloat16x2_t svld2_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s8))) svint8x2_t svld2_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f64))) @@ -21739,6 +21773,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s32))) svint32x2_t svld2_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s64))) svint64x2_t svld2_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_mf8))) +svmfloat8x2_t svld2_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s16))) svint16x2_t svld2_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u8))) @@ -21749,6 +21785,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u64))) svuint64x3_t svld3_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u16))) svuint16x3_t svld3_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) +svbfloat16x3_t svld3_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s8))) svint8x3_t svld3_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f64))) @@ -21761,6 +21799,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s32))) svint32x3_t svld3_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s64))) svint64x3_t svld3_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_mf8))) +svmfloat8x3_t svld3_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s16))) svint16x3_t svld3_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u8))) @@ -21771,6 +21811,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u64))) svuint64x3_t svld3_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u16))) svuint16x3_t svld3_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) +svbfloat16x3_t svld3_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s8))) svint8x3_t svld3_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f64))) @@ -21783,6 +21825,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s32))) svint32x3_t svld3_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s64))) svint64x3_t svld3_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_mf8))) +svmfloat8x3_t svld3_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s16))) svint16x3_t svld3_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u8))) @@ -21793,6 +21837,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u64))) svuint64x4_t svld4_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u16))) svuint16x4_t svld4_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) +svbfloat16x4_t svld4_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s8))) svint8x4_t svld4_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f64))) @@ -21805,6 +21851,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s32))) svint32x4_t svld4_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s64))) svint64x4_t svld4_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_mf8))) +svmfloat8x4_t svld4_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s16))) svint16x4_t svld4_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u8))) @@ -21815,6 +21863,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u64))) svuint64x4_t svld4_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u16))) svuint16x4_t svld4_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) +svbfloat16x4_t svld4_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s8))) svint8x4_t svld4_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f64))) @@ -21827,6 +21877,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s32))) svint32x4_t svld4_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s64))) svint64x4_t svld4_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_mf8))) +svmfloat8x4_t svld4_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s16))) svint16x4_t svld4_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8))) @@ -21837,6 +21889,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64))) svuint64_t svldnt1_u64(svbool_t, uint64_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16))) svuint16_t svldnt1_u16(svbool_t, uint16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) +svbfloat16_t svldnt1_bf16(svbool_t, bfloat16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8))) svint8_t svldnt1_s8(svbool_t, int8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64))) @@ -21849,6 +21903,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32))) svint32_t svldnt1_s32(svbool_t, int32_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64))) svint64_t svldnt1_s64(svbool_t, int64_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8))) +svmfloat8_t svldnt1_mf8(svbool_t, mfloat8_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16))) svint16_t svldnt1_s16(svbool_t, int16_t const *); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8))) @@ -21859,6 +21915,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64))) svuint64_t svldnt1_vnum_u64(svbool_t, uint64_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16))) svuint16_t svldnt1_vnum_u16(svbool_t, uint16_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) +svbfloat16_t svldnt1_vnum_bf16(svbool_t, bfloat16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8))) svint8_t svldnt1_vnum_s8(svbool_t, int8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64))) @@ -21871,6 +21929,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32))) svint32_t svldnt1_vnum_s32(svbool_t, int32_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64))) svint64_t svldnt1_vnum_s64(svbool_t, int64_t const *, int64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8))) +svmfloat8_t svldnt1_vnum_mf8(svbool_t, mfloat8_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16))) svint16_t svldnt1_vnum_s16(svbool_t, int16_t const *, int64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u8))) @@ -21881,6 +21941,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u64))) uint64_t svlen_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u16))) uint64_t svlen_u16(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) +uint64_t svlen_bf16(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s8))) uint64_t svlen_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f64))) @@ -24171,6 +24233,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u64))) svuint64_t svrev_u64(svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u16))) svuint16_t svrev_u16(svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) +svbfloat16_t svrev_bf16(svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s8))) svint8_t svrev_s8(svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f64))) @@ -24449,6 +24513,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64))) svuint64_t svsel_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16))) svuint16_t svsel_u16(svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) +svbfloat16_t svsel_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8))) svint8_t svsel_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64))) @@ -24471,6 +24537,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u64))) svuint64x2_t svset2_u64(svuint64x2_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u16))) svuint16x2_t svset2_u16(svuint16x2_t, uint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) +svbfloat16x2_t svset2_bf16(svbfloat16x2_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s8))) svint8x2_t svset2_s8(svint8x2_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f64))) @@ -24483,6 +24551,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s32))) svint32x2_t svset2_s32(svint32x2_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s64))) svint64x2_t svset2_s64(svint64x2_t, uint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_mf8))) +svmfloat8x2_t svset2_mf8(svmfloat8x2_t, uint64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s16))) svint16x2_t svset2_s16(svint16x2_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u8))) @@ -24493,6 +24563,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u64))) svuint64x3_t svset3_u64(svuint64x3_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u16))) svuint16x3_t svset3_u16(svuint16x3_t, uint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) +svbfloat16x3_t svset3_bf16(svbfloat16x3_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s8))) svint8x3_t svset3_s8(svint8x3_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f64))) @@ -24505,6 +24577,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s32))) svint32x3_t svset3_s32(svint32x3_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s64))) svint64x3_t svset3_s64(svint64x3_t, uint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_mf8))) +svmfloat8x3_t svset3_mf8(svmfloat8x3_t, uint64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s16))) svint16x3_t svset3_s16(svint16x3_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u8))) @@ -24515,6 +24589,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u64))) svuint64x4_t svset4_u64(svuint64x4_t, uint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u16))) svuint16x4_t svset4_u16(svuint16x4_t, uint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) +svbfloat16x4_t svset4_bf16(svbfloat16x4_t, uint64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s8))) svint8x4_t svset4_s8(svint8x4_t, uint64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f64))) @@ -24527,6 +24603,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s32))) svint32x4_t svset4_s32(svint32x4_t, uint64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s64))) svint64x4_t svset4_s64(svint64x4_t, uint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_mf8))) +svmfloat8x4_t svset4_mf8(svmfloat8x4_t, uint64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s16))) svint16x4_t svset4_s16(svint16x4_t, uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u8))) @@ -24537,6 +24615,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u64))) svuint64_t svsplice_u64(svbool_t, svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u16))) svuint16_t svsplice_u16(svbool_t, svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) +svbfloat16_t svsplice_bf16(svbool_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s8))) svint8_t svsplice_s8(svbool_t, svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f64))) @@ -24577,6 +24657,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64))) void svst1_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16))) void svst1_u16(svbool_t, uint16_t *, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) +void svst1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8))) void svst1_s8(svbool_t, int8_t *, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64))) @@ -24589,6 +24671,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32))) void svst1_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64))) void svst1_s64(svbool_t, int64_t *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8))) +void svst1_mf8(svbool_t, mfloat8_t *, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16))) void svst1_s16(svbool_t, int16_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8))) @@ -24599,6 +24683,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64))) void svst1_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16))) void svst1_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) +void svst1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8))) void svst1_vnum_s8(svbool_t, int8_t *, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64))) @@ -24611,6 +24697,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32))) void svst1_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64))) void svst1_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8))) +void svst1_vnum_mf8(svbool_t, mfloat8_t *, int64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16))) void svst1_vnum_s16(svbool_t, int16_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s32))) @@ -24669,6 +24757,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u64))) void svst2_u64(svbool_t, uint64_t *, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u16))) void svst2_u16(svbool_t, uint16_t *, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) +void svst2_bf16(svbool_t, bfloat16_t *, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s8))) void svst2_s8(svbool_t, int8_t *, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f64))) @@ -24681,6 +24771,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s32))) void svst2_s32(svbool_t, int32_t *, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s64))) void svst2_s64(svbool_t, int64_t *, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_mf8))) +void svst2_mf8(svbool_t, mfloat8_t *, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s16))) void svst2_s16(svbool_t, int16_t *, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u8))) @@ -24691,6 +24783,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u64))) void svst2_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u16))) void svst2_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) +void svst2_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s8))) void svst2_vnum_s8(svbool_t, int8_t *, int64_t, svint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f64))) @@ -24703,6 +24797,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s32))) void svst2_vnum_s32(svbool_t, int32_t *, int64_t, svint32x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s64))) void svst2_vnum_s64(svbool_t, int64_t *, int64_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_mf8))) +void svst2_vnum_mf8(svbool_t, mfloat8_t *, int64_t, svmfloat8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s16))) void svst2_vnum_s16(svbool_t, int16_t *, int64_t, svint16x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u8))) @@ -24713,6 +24809,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u64))) void svst3_u64(svbool_t, uint64_t *, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u16))) void svst3_u16(svbool_t, uint16_t *, svuint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) +void svst3_bf16(svbool_t, bfloat16_t *, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s8))) void svst3_s8(svbool_t, int8_t *, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f64))) @@ -24725,6 +24823,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s32))) void svst3_s32(svbool_t, int32_t *, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s64))) void svst3_s64(svbool_t, int64_t *, svint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_mf8))) +void svst3_mf8(svbool_t, mfloat8_t *, svmfloat8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s16))) void svst3_s16(svbool_t, int16_t *, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u8))) @@ -24735,6 +24835,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u64))) void svst3_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u16))) void svst3_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) +void svst3_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s8))) void svst3_vnum_s8(svbool_t, int8_t *, int64_t, svint8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f64))) @@ -24747,6 +24849,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s32))) void svst3_vnum_s32(svbool_t, int32_t *, int64_t, svint32x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s64))) void svst3_vnum_s64(svbool_t, int64_t *, int64_t, svint64x3_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_mf8))) +void svst3_vnum_mf8(svbool_t, mfloat8_t *, int64_t, svmfloat8x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s16))) void svst3_vnum_s16(svbool_t, int16_t *, int64_t, svint16x3_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u8))) @@ -24757,6 +24861,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u64))) void svst4_u64(svbool_t, uint64_t *, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u16))) void svst4_u16(svbool_t, uint16_t *, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) +void svst4_bf16(svbool_t, bfloat16_t *, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s8))) void svst4_s8(svbool_t, int8_t *, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f64))) @@ -24769,6 +24875,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s32))) void svst4_s32(svbool_t, int32_t *, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s64))) void svst4_s64(svbool_t, int64_t *, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_mf8))) +void svst4_mf8(svbool_t, mfloat8_t *, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s16))) void svst4_s16(svbool_t, int16_t *, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u8))) @@ -24779,6 +24887,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u64))) void svst4_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u16))) void svst4_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) +void svst4_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s8))) void svst4_vnum_s8(svbool_t, int8_t *, int64_t, svint8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f64))) @@ -24791,6 +24901,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s32))) void svst4_vnum_s32(svbool_t, int32_t *, int64_t, svint32x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s64))) void svst4_vnum_s64(svbool_t, int64_t *, int64_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_mf8))) +void svst4_vnum_mf8(svbool_t, mfloat8_t *, int64_t, svmfloat8x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s16))) void svst4_vnum_s16(svbool_t, int16_t *, int64_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8))) @@ -24801,6 +24913,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64))) void svstnt1_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16))) void svstnt1_u16(svbool_t, uint16_t *, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) +void svstnt1_bf16(svbool_t, bfloat16_t *, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8))) void svstnt1_s8(svbool_t, int8_t *, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64))) @@ -24813,6 +24927,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32))) void svstnt1_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64))) void svstnt1_s64(svbool_t, int64_t *, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8))) +void svstnt1_mf8(svbool_t, mfloat8_t *, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16))) void svstnt1_s16(svbool_t, int16_t *, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8))) @@ -24823,6 +24939,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64))) void svstnt1_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16))) void svstnt1_vnum_u16(svbool_t, uint16_t *, int64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) +void svstnt1_vnum_bf16(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8))) void svstnt1_vnum_s8(svbool_t, int8_t *, int64_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64))) @@ -24835,6 +24953,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32))) void svstnt1_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64))) void svstnt1_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8))) +void svstnt1_vnum_mf8(svbool_t, mfloat8_t *, int64_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16))) void svstnt1_vnum_s16(svbool_t, int16_t *, int64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_m))) @@ -25109,6 +25229,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u64))) svuint64_t svtbl_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u16))) svuint16_t svtbl_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) +svbfloat16_t svtbl_bf16(svbfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s8))) svint8_t svtbl_s8(svint8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f64))) @@ -25131,6 +25253,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u64))) svuint64_t svtrn1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u16))) svuint16_t svtrn1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) +svbfloat16_t svtrn1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s8))) svint8_t svtrn1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f64))) @@ -25161,6 +25285,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u64))) svuint64_t svtrn2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u16))) svuint16_t svtrn2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) +svbfloat16_t svtrn2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s8))) svint8_t svtrn2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f64))) @@ -25191,6 +25317,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u64))) svuint64x2_t svundef2_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_u16))) svuint16x2_t svundef2_u16(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_bf16))) +svbfloat16x2_t svundef2_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s8))) svint8x2_t svundef2_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_f64))) @@ -25203,6 +25331,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s32))) svint32x2_t svundef2_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s64))) svint64x2_t svundef2_s64(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_mf8))) +svmfloat8x2_t svundef2_mf8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef2_s16))) svint16x2_t svundef2_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u8))) @@ -25213,6 +25343,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u64))) svuint64x3_t svundef3_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_u16))) svuint16x3_t svundef3_u16(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_bf16))) +svbfloat16x3_t svundef3_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s8))) svint8x3_t svundef3_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_f64))) @@ -25225,6 +25357,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s32))) svint32x3_t svundef3_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s64))) svint64x3_t svundef3_s64(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_mf8))) +svmfloat8x3_t svundef3_mf8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef3_s16))) svint16x3_t svundef3_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u8))) @@ -25235,6 +25369,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u64))) svuint64x4_t svundef4_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_u16))) svuint16x4_t svundef4_u16(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_bf16))) +svbfloat16x4_t svundef4_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s8))) svint8x4_t svundef4_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_f64))) @@ -25247,6 +25383,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s32))) svint32x4_t svundef4_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s64))) svint64x4_t svundef4_s64(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_mf8))) +svmfloat8x4_t svundef4_mf8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef4_s16))) svint16x4_t svundef4_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u8))) @@ -25257,6 +25395,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u64))) svuint64_t svundef_u64(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_u16))) svuint16_t svundef_u16(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_bf16))) +svbfloat16_t svundef_bf16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s8))) svint8_t svundef_s8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_f64))) @@ -25269,6 +25409,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s32))) svint32_t svundef_s32(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s64))) svint64_t svundef_s64(void); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_mf8))) +svmfloat8_t svundef_mf8(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svundef_s16))) svint16_t svundef_s16(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svunpkhi_b))) @@ -25307,6 +25449,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u64))) svuint64_t svuzp1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u16))) svuint16_t svuzp1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) +svbfloat16_t svuzp1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s8))) svint8_t svuzp1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f64))) @@ -25337,6 +25481,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u64))) svuint64_t svuzp2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u16))) svuint16_t svuzp2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) +svbfloat16_t svuzp2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s8))) svint8_t svuzp2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f64))) @@ -25431,6 +25577,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u64))) svuint64_t svzip1_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u16))) svuint16_t svzip1_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) +svbfloat16_t svzip1_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s8))) svint8_t svzip1_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f64))) @@ -25461,6 +25609,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u64))) svuint64_t svzip2_u64(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u16))) svuint16_t svzip2_u16(svuint16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) +svbfloat16_t svzip2_bf16(svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s8))) svint8_t svzip2_s8(svint8_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f64))) @@ -26219,6 +26369,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u64))) uint64_t svclasta(svbool_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_u16))) uint16_t svclasta(svbool_t, uint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_bf16))) +bfloat16_t svclasta(svbool_t, bfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_s8))) int8_t svclasta(svbool_t, int8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_n_f64))) @@ -26241,6 +26393,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u64))) svuint64_t svclasta(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_u16))) svuint16_t svclasta(svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_bf16))) +svbfloat16_t svclasta(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_s8))) svint8_t svclasta(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclasta_f64))) @@ -26263,6 +26417,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u64))) uint64_t svclastb(svbool_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_u16))) uint16_t svclastb(svbool_t, uint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_bf16))) +bfloat16_t svclastb(svbool_t, bfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_s8))) int8_t svclastb(svbool_t, int8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_n_f64))) @@ -26285,6 +26441,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u64))) svuint64_t svclastb(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_u16))) svuint16_t svclastb(svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_bf16))) +svbfloat16_t svclastb(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_s8))) svint8_t svclastb(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclastb_f64))) @@ -26845,6 +27003,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_m))) svuint64_t svcnt_m(svuint64_t, svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_m))) svuint16_t svcnt_m(svuint16_t, svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_m))) +svuint16_t svcnt_m(svuint16_t, svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_m))) svuint8_t svcnt_m(svuint8_t, svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_m))) @@ -26867,6 +27027,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_x))) svuint64_t svcnt_x(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_x))) svuint16_t svcnt_x(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_x))) +svuint16_t svcnt_x(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_x))) svuint8_t svcnt_x(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_x))) @@ -26889,6 +27051,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u64_z))) svuint64_t svcnt_z(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_u16_z))) svuint16_t svcnt_z(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_bf16_z))) +svuint16_t svcnt_z(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_s8_z))) svuint8_t svcnt_z(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcnt_f64_z))) @@ -26911,6 +27075,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u64))) svuint64x2_t svcreate2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_u16))) svuint16x2_t svcreate2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_bf16))) +svbfloat16x2_t svcreate2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s8))) svint8x2_t svcreate2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_f64))) @@ -26923,6 +27089,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s32))) svint32x2_t svcreate2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s64))) svint64x2_t svcreate2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_mf8))) +svmfloat8x2_t svcreate2(svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate2_s16))) svint16x2_t svcreate2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u8))) @@ -26933,6 +27101,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u64))) svuint64x3_t svcreate3(svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_u16))) svuint16x3_t svcreate3(svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_bf16))) +svbfloat16x3_t svcreate3(svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s8))) svint8x3_t svcreate3(svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_f64))) @@ -26945,6 +27115,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s32))) svint32x3_t svcreate3(svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s64))) svint64x3_t svcreate3(svint64_t, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_mf8))) +svmfloat8x3_t svcreate3(svmfloat8_t, svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate3_s16))) svint16x3_t svcreate3(svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u8))) @@ -26955,6 +27127,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u64))) svuint64x4_t svcreate4(svuint64_t, svuint64_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_u16))) svuint16x4_t svcreate4(svuint16_t, svuint16_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_bf16))) +svbfloat16x4_t svcreate4(svbfloat16_t, svbfloat16_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s8))) svint8x4_t svcreate4(svint8_t, svint8_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_f64))) @@ -26967,6 +27141,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s32))) svint32x4_t svcreate4(svint32_t, svint32_t, svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s64))) svint64x4_t svcreate4(svint64_t, svint64_t, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_mf8))) +svmfloat8x4_t svcreate4(svmfloat8_t, svmfloat8_t, svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcreate4_s16))) svint16x4_t svcreate4(svint16_t, svint16_t, svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_f16_f32_m))) @@ -27373,6 +27549,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64))) svuint64_t svdup_u64(uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16))) svuint16_t svdup_u16(uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16))) +svbfloat16_t svdup_bf16(bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8))) svint8_t svdup_s8(int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64))) @@ -27395,6 +27573,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_m))) svuint64_t svdup_u64_m(svuint64_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_m))) svuint16_t svdup_u16_m(svuint16_t, svbool_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_m))) +svbfloat16_t svdup_bf16_m(svbfloat16_t, svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_m))) svint8_t svdup_s8_m(svint8_t, svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_m))) @@ -27425,6 +27605,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_x))) svuint64_t svdup_u64_x(svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_x))) svuint16_t svdup_u16_x(svbool_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_x))) +svbfloat16_t svdup_bf16_x(svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_x))) svint8_t svdup_s8_x(svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_x))) @@ -27447,6 +27629,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u64_z))) svuint64_t svdup_u64_z(svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_u16_z))) svuint16_t svdup_u16_z(svbool_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_bf16_z))) +svbfloat16_t svdup_bf16_z(svbool_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_s8_z))) svint8_t svdup_s8_z(svbool_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_n_f64_z))) @@ -27469,6 +27653,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u64))) svuint64_t svdup_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_u16))) svuint16_t svdup_lane(svuint16_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_bf16))) +svbfloat16_t svdup_lane(svbfloat16_t, uint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_s8))) svint8_t svdup_lane(svint8_t, uint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_lane_f64))) @@ -27489,6 +27675,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s8))) svint8_t svdupq_s8(int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_u16))) svuint16_t svdupq_u16(uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_bf16))) +svbfloat16_t svdupq_bf16(bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_f16))) svfloat16_t svdupq_f16(float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t, float16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_n_s16))) @@ -27521,6 +27709,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u64))) svuint64_t svdupq_lane(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_u16))) svuint16_t svdupq_lane(svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_bf16))) +svbfloat16_t svdupq_lane(svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_s8))) svint8_t svdupq_lane(svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdupq_lane_f64))) @@ -27657,6 +27847,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u64))) svuint64_t svext(svuint64_t, svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_u16))) svuint16_t svext(svuint16_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_bf16))) +svbfloat16_t svext(svbfloat16_t, svbfloat16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_s8))) svint8_t svext(svint8_t, svint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svext_f64))) @@ -27751,6 +27943,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u64))) svuint64_t svget2(svuint64x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_u16))) svuint16_t svget2(svuint16x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_bf16))) +svbfloat16_t svget2(svbfloat16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s8))) svint8_t svget2(svint8x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_f64))) @@ -27763,6 +27957,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s32))) svint32_t svget2(svint32x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s64))) svint64_t svget2(svint64x2_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_mf8))) +svmfloat8_t svget2(svmfloat8x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget2_s16))) svint16_t svget2(svint16x2_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u8))) @@ -27773,6 +27969,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u64))) svuint64_t svget3(svuint64x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_u16))) svuint16_t svget3(svuint16x3_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_bf16))) +svbfloat16_t svget3(svbfloat16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s8))) svint8_t svget3(svint8x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_f64))) @@ -27785,6 +27983,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s32))) svint32_t svget3(svint32x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s64))) svint64_t svget3(svint64x3_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_mf8))) +svmfloat8_t svget3(svmfloat8x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget3_s16))) svint16_t svget3(svint16x3_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u8))) @@ -27795,6 +27995,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u64))) svuint64_t svget4(svuint64x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_u16))) svuint16_t svget4(svuint16x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_bf16))) +svbfloat16_t svget4(svbfloat16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s8))) svint8_t svget4(svint8x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_f64))) @@ -27807,6 +28009,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s32))) svint32_t svget4(svint32x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s64))) svint64_t svget4(svint64x4_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_mf8))) +svmfloat8_t svget4(svmfloat8x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget4_s16))) svint16_t svget4(svint16x4_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u8))) @@ -27817,6 +28021,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u64))) svuint64_t svinsr(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_u16))) svuint16_t svinsr(svuint16_t, uint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_bf16))) +svbfloat16_t svinsr(svbfloat16_t, bfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_s8))) svint8_t svinsr(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svinsr_n_f64))) @@ -27839,6 +28045,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u64))) uint64_t svlasta(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_u16))) uint16_t svlasta(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_bf16))) +bfloat16_t svlasta(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_s8))) int8_t svlasta(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlasta_f64))) @@ -27861,6 +28069,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u64))) uint64_t svlastb(svbool_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_u16))) uint16_t svlastb(svbool_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_bf16))) +bfloat16_t svlastb(svbool_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_s8))) int8_t svlastb(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlastb_f64))) @@ -27883,6 +28093,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u64))) svuint64_t svld1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_u16))) svuint16_t svld1(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_bf16))) +svbfloat16_t svld1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s8))) svint8_t svld1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_f64))) @@ -27895,6 +28107,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s32))) svint32_t svld1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s64))) svint64_t svld1(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_mf8))) +svmfloat8_t svld1(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_s16))) svint16_t svld1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u8))) @@ -27905,6 +28119,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u64))) svuint64_t svld1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_u16))) svuint16_t svld1_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_bf16))) +svbfloat16_t svld1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s8))) svint8_t svld1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_f64))) @@ -27917,6 +28133,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s32))) svint32_t svld1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s64))) svint64_t svld1_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_mf8))) +svmfloat8_t svld1_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1_vnum_s16))) svint16_t svld1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u8))) @@ -27927,6 +28145,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u64))) svuint64_t svld1rq(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_u16))) svuint16_t svld1rq(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_bf16))) +svbfloat16_t svld1rq(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s8))) svint8_t svld1rq(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_f64))) @@ -27939,6 +28159,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s32))) svint32_t svld1rq(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s64))) svint64_t svld1rq(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_mf8))) +svmfloat8_t svld1rq(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1rq_s16))) svint16_t svld1rq(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u8))) @@ -27949,6 +28171,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u64))) svuint64x2_t svld2(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_u16))) svuint16x2_t svld2(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_bf16))) +svbfloat16x2_t svld2(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s8))) svint8x2_t svld2(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_f64))) @@ -27961,6 +28185,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s32))) svint32x2_t svld2(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s64))) svint64x2_t svld2(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_mf8))) +svmfloat8x2_t svld2(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_s16))) svint16x2_t svld2(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u8))) @@ -27971,6 +28197,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u64))) svuint64x2_t svld2_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_u16))) svuint16x2_t svld2_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_bf16))) +svbfloat16x2_t svld2_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s8))) svint8x2_t svld2_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_f64))) @@ -27983,6 +28211,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s32))) svint32x2_t svld2_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s64))) svint64x2_t svld2_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_mf8))) +svmfloat8x2_t svld2_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld2_vnum_s16))) svint16x2_t svld2_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u8))) @@ -27993,6 +28223,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u64))) svuint64x3_t svld3(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_u16))) svuint16x3_t svld3(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_bf16))) +svbfloat16x3_t svld3(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s8))) svint8x3_t svld3(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_f64))) @@ -28005,6 +28237,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s32))) svint32x3_t svld3(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s64))) svint64x3_t svld3(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_mf8))) +svmfloat8x3_t svld3(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_s16))) svint16x3_t svld3(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u8))) @@ -28015,6 +28249,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u64))) svuint64x3_t svld3_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_u16))) svuint16x3_t svld3_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_bf16))) +svbfloat16x3_t svld3_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s8))) svint8x3_t svld3_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_f64))) @@ -28027,6 +28263,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s32))) svint32x3_t svld3_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s64))) svint64x3_t svld3_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_mf8))) +svmfloat8x3_t svld3_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld3_vnum_s16))) svint16x3_t svld3_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u8))) @@ -28037,6 +28275,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u64))) svuint64x4_t svld4(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_u16))) svuint16x4_t svld4(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_bf16))) +svbfloat16x4_t svld4(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s8))) svint8x4_t svld4(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_f64))) @@ -28049,6 +28289,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s32))) svint32x4_t svld4(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s64))) svint64x4_t svld4(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_mf8))) +svmfloat8x4_t svld4(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_s16))) svint16x4_t svld4(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u8))) @@ -28059,6 +28301,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u64))) svuint64x4_t svld4_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_u16))) svuint16x4_t svld4_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_bf16))) +svbfloat16x4_t svld4_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s8))) svint8x4_t svld4_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_f64))) @@ -28071,6 +28315,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s32))) svint32x4_t svld4_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s64))) svint64x4_t svld4_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_mf8))) +svmfloat8x4_t svld4_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld4_vnum_s16))) svint16x4_t svld4_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u8))) @@ -28081,6 +28327,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u64))) svuint64_t svldnt1(svbool_t, uint64_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_u16))) svuint16_t svldnt1(svbool_t, uint16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_bf16))) +svbfloat16_t svldnt1(svbool_t, bfloat16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s8))) svint8_t svldnt1(svbool_t, int8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_f64))) @@ -28093,6 +28341,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s32))) svint32_t svldnt1(svbool_t, int32_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s64))) svint64_t svldnt1(svbool_t, int64_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_mf8))) +svmfloat8_t svldnt1(svbool_t, mfloat8_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_s16))) svint16_t svldnt1(svbool_t, int16_t const *); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u8))) @@ -28103,6 +28353,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u64))) svuint64_t svldnt1_vnum(svbool_t, uint64_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_u16))) svuint16_t svldnt1_vnum(svbool_t, uint16_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_bf16))) +svbfloat16_t svldnt1_vnum(svbool_t, bfloat16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s8))) svint8_t svldnt1_vnum(svbool_t, int8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_f64))) @@ -28115,6 +28367,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s32))) svint32_t svldnt1_vnum(svbool_t, int32_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s64))) svint64_t svldnt1_vnum(svbool_t, int64_t const *, int64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_mf8))) +svmfloat8_t svldnt1_vnum(svbool_t, mfloat8_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svldnt1_vnum_s16))) svint16_t svldnt1_vnum(svbool_t, int16_t const *, int64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u8))) @@ -28125,6 +28379,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u64))) uint64_t svlen(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_u16))) uint64_t svlen(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_bf16))) +uint64_t svlen(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_s8))) uint64_t svlen(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svlen_f64))) @@ -30369,6 +30625,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u64))) svuint64_t svrev(svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_u16))) svuint16_t svrev(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_bf16))) +svbfloat16_t svrev(svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_s8))) svint8_t svrev(svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svrev_f64))) @@ -30639,6 +30897,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u64))) svuint64_t svsel(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_u16))) svuint16_t svsel(svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_bf16))) +svbfloat16_t svsel(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_s8))) svint8_t svsel(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsel_f64))) @@ -30661,6 +30921,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u64))) svuint64x2_t svset2(svuint64x2_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_u16))) svuint16x2_t svset2(svuint16x2_t, uint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_bf16))) +svbfloat16x2_t svset2(svbfloat16x2_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s8))) svint8x2_t svset2(svint8x2_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_f64))) @@ -30673,6 +30935,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s32))) svint32x2_t svset2(svint32x2_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s64))) svint64x2_t svset2(svint64x2_t, uint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_mf8))) +svmfloat8x2_t svset2(svmfloat8x2_t, uint64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset2_s16))) svint16x2_t svset2(svint16x2_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u8))) @@ -30683,6 +30947,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u64))) svuint64x3_t svset3(svuint64x3_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_u16))) svuint16x3_t svset3(svuint16x3_t, uint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_bf16))) +svbfloat16x3_t svset3(svbfloat16x3_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s8))) svint8x3_t svset3(svint8x3_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_f64))) @@ -30695,6 +30961,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s32))) svint32x3_t svset3(svint32x3_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s64))) svint64x3_t svset3(svint64x3_t, uint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_mf8))) +svmfloat8x3_t svset3(svmfloat8x3_t, uint64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset3_s16))) svint16x3_t svset3(svint16x3_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u8))) @@ -30705,6 +30973,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u64))) svuint64x4_t svset4(svuint64x4_t, uint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_u16))) svuint16x4_t svset4(svuint16x4_t, uint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_bf16))) +svbfloat16x4_t svset4(svbfloat16x4_t, uint64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s8))) svint8x4_t svset4(svint8x4_t, uint64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_f64))) @@ -30717,6 +30987,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s32))) svint32x4_t svset4(svint32x4_t, uint64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s64))) svint64x4_t svset4(svint64x4_t, uint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_mf8))) +svmfloat8x4_t svset4(svmfloat8x4_t, uint64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset4_s16))) svint16x4_t svset4(svint16x4_t, uint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u8))) @@ -30727,6 +30999,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u64))) svuint64_t svsplice(svbool_t, svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_u16))) svuint16_t svsplice(svbool_t, svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_bf16))) +svbfloat16_t svsplice(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_s8))) svint8_t svsplice(svbool_t, svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsplice_f64))) @@ -30767,6 +31041,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u64))) void svst1(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_u16))) void svst1(svbool_t, uint16_t *, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_bf16))) +void svst1(svbool_t, bfloat16_t *, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s8))) void svst1(svbool_t, int8_t *, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_f64))) @@ -30779,6 +31055,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s32))) void svst1(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s64))) void svst1(svbool_t, int64_t *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_mf8))) +void svst1(svbool_t, mfloat8_t *, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_s16))) void svst1(svbool_t, int16_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u8))) @@ -30789,6 +31067,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u64))) void svst1_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_u16))) void svst1_vnum(svbool_t, uint16_t *, int64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_bf16))) +void svst1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s8))) void svst1_vnum(svbool_t, int8_t *, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_f64))) @@ -30801,6 +31081,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s32))) void svst1_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s64))) void svst1_vnum(svbool_t, int64_t *, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_mf8))) +void svst1_vnum(svbool_t, mfloat8_t *, int64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1_vnum_s16))) void svst1_vnum(svbool_t, int16_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1b_s32))) @@ -30859,6 +31141,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u64))) void svst2(svbool_t, uint64_t *, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_u16))) void svst2(svbool_t, uint16_t *, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_bf16))) +void svst2(svbool_t, bfloat16_t *, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s8))) void svst2(svbool_t, int8_t *, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_f64))) @@ -30871,6 +31155,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s32))) void svst2(svbool_t, int32_t *, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s64))) void svst2(svbool_t, int64_t *, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_mf8))) +void svst2(svbool_t, mfloat8_t *, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_s16))) void svst2(svbool_t, int16_t *, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u8))) @@ -30881,6 +31167,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u64))) void svst2_vnum(svbool_t, uint64_t *, int64_t, svuint64x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_u16))) void svst2_vnum(svbool_t, uint16_t *, int64_t, svuint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_bf16))) +void svst2_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s8))) void svst2_vnum(svbool_t, int8_t *, int64_t, svint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_f64))) @@ -30893,6 +31181,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s32))) void svst2_vnum(svbool_t, int32_t *, int64_t, svint32x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s64))) void svst2_vnum(svbool_t, int64_t *, int64_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_mf8))) +void svst2_vnum(svbool_t, mfloat8_t *, int64_t, svmfloat8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2_vnum_s16))) void svst2_vnum(svbool_t, int16_t *, int64_t, svint16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u8))) @@ -30903,6 +31193,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u64))) void svst3(svbool_t, uint64_t *, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_u16))) void svst3(svbool_t, uint16_t *, svuint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_bf16))) +void svst3(svbool_t, bfloat16_t *, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s8))) void svst3(svbool_t, int8_t *, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_f64))) @@ -30915,6 +31207,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s32))) void svst3(svbool_t, int32_t *, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s64))) void svst3(svbool_t, int64_t *, svint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_mf8))) +void svst3(svbool_t, mfloat8_t *, svmfloat8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_s16))) void svst3(svbool_t, int16_t *, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u8))) @@ -30925,6 +31219,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u64))) void svst3_vnum(svbool_t, uint64_t *, int64_t, svuint64x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_u16))) void svst3_vnum(svbool_t, uint16_t *, int64_t, svuint16x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_bf16))) +void svst3_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s8))) void svst3_vnum(svbool_t, int8_t *, int64_t, svint8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_f64))) @@ -30937,6 +31233,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s32))) void svst3_vnum(svbool_t, int32_t *, int64_t, svint32x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s64))) void svst3_vnum(svbool_t, int64_t *, int64_t, svint64x3_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_mf8))) +void svst3_vnum(svbool_t, mfloat8_t *, int64_t, svmfloat8x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst3_vnum_s16))) void svst3_vnum(svbool_t, int16_t *, int64_t, svint16x3_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u8))) @@ -30947,6 +31245,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u64))) void svst4(svbool_t, uint64_t *, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_u16))) void svst4(svbool_t, uint16_t *, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_bf16))) +void svst4(svbool_t, bfloat16_t *, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s8))) void svst4(svbool_t, int8_t *, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_f64))) @@ -30959,6 +31259,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s32))) void svst4(svbool_t, int32_t *, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s64))) void svst4(svbool_t, int64_t *, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_mf8))) +void svst4(svbool_t, mfloat8_t *, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_s16))) void svst4(svbool_t, int16_t *, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u8))) @@ -30969,6 +31271,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u64))) void svst4_vnum(svbool_t, uint64_t *, int64_t, svuint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_u16))) void svst4_vnum(svbool_t, uint16_t *, int64_t, svuint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_bf16))) +void svst4_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s8))) void svst4_vnum(svbool_t, int8_t *, int64_t, svint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_f64))) @@ -30981,6 +31285,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s32))) void svst4_vnum(svbool_t, int32_t *, int64_t, svint32x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s64))) void svst4_vnum(svbool_t, int64_t *, int64_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_mf8))) +void svst4_vnum(svbool_t, mfloat8_t *, int64_t, svmfloat8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst4_vnum_s16))) void svst4_vnum(svbool_t, int16_t *, int64_t, svint16x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u8))) @@ -30991,6 +31297,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u64))) void svstnt1(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_u16))) void svstnt1(svbool_t, uint16_t *, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_bf16))) +void svstnt1(svbool_t, bfloat16_t *, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s8))) void svstnt1(svbool_t, int8_t *, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_f64))) @@ -31003,6 +31311,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s32))) void svstnt1(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s64))) void svstnt1(svbool_t, int64_t *, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_mf8))) +void svstnt1(svbool_t, mfloat8_t *, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_s16))) void svstnt1(svbool_t, int16_t *, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u8))) @@ -31013,6 +31323,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u64))) void svstnt1_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_u16))) void svstnt1_vnum(svbool_t, uint16_t *, int64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_bf16))) +void svstnt1_vnum(svbool_t, bfloat16_t *, int64_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s8))) void svstnt1_vnum(svbool_t, int8_t *, int64_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_f64))) @@ -31025,6 +31337,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s32))) void svstnt1_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s64))) void svstnt1_vnum(svbool_t, int64_t *, int64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_mf8))) +void svstnt1_vnum(svbool_t, mfloat8_t *, int64_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1_vnum_s16))) void svstnt1_vnum(svbool_t, int16_t *, int64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_n_f64_m))) @@ -31299,6 +31613,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u64))) svuint64_t svtbl(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_u16))) svuint16_t svtbl(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_bf16))) +svbfloat16_t svtbl(svbfloat16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_s8))) svint8_t svtbl(svint8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl_f64))) @@ -31321,6 +31637,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u64))) svuint64_t svtrn1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_u16))) svuint16_t svtrn1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_bf16))) +svbfloat16_t svtrn1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_s8))) svint8_t svtrn1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn1_f64))) @@ -31343,6 +31661,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u64))) svuint64_t svtrn2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_u16))) svuint16_t svtrn2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_bf16))) +svbfloat16_t svtrn2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_s8))) svint8_t svtrn2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtrn2_f64))) @@ -31393,6 +31713,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u64))) svuint64_t svuzp1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_u16))) svuint16_t svuzp1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_bf16))) +svbfloat16_t svuzp1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_s8))) svint8_t svuzp1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp1_f64))) @@ -31415,6 +31737,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u64))) svuint64_t svuzp2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_u16))) svuint16_t svuzp2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_bf16))) +svbfloat16_t svuzp2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_s8))) svint8_t svuzp2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzp2_f64))) @@ -31501,6 +31825,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u64))) svuint64_t svzip1(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_u16))) svuint16_t svzip1(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_bf16))) +svbfloat16_t svzip1(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_s8))) svint8_t svzip1(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip1_f64))) @@ -31523,6 +31849,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u64))) svuint64_t svzip2(svuint64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_u16))) svuint16_t svzip2(svuint16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_bf16))) +svbfloat16_t svzip2(svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s8))) svint8_t svzip2(svint8_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_f64))) @@ -31537,6 +31865,18 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s64))) svint64_t svzip2(svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzip2_s16))) svint16_t svzip2(svint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) +svfloat64_t svexpa_f64(svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) +svfloat32_t svexpa_f32(svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) +svfloat16_t svexpa_f16(svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f64))) +svfloat64_t svexpa(svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f32))) +svfloat32_t svexpa(svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svexpa_f16))) +svfloat16_t svexpa(svuint16_t); #define svcvtnt_bf16_x svcvtnt_bf16_m #define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m #define svcvtnt_f16_x svcvtnt_f16_m diff --git a/lib/include/avx10_2_512bf16intrin.h b/lib/include/avx10_2_512bf16intrin.h index ce43ecbcfe..75290d22ef 100644 --- a/lib/include/avx10_2_512bf16intrin.h +++ b/lib/include/avx10_2_512bf16intrin.h @@ -441,8 +441,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -469,8 +469,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -497,8 +497,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, - (__v32bf)__C); + return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, + (__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( @@ -527,8 +527,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { - return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, - -(__v32bf)__C); + return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B, + -(__v32bf)__C); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( diff --git a/lib/include/avx10_2_512convertintrin.h b/lib/include/avx10_2_512convertintrin.h index 0b5fca5cda..ee8cbf28ca 100644 --- a/lib/include/avx10_2_512convertintrin.h +++ b/lib/include/avx10_2_512convertintrin.h @@ -78,20 +78,20 @@ _mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiassph_bf8(__m512i __A, __m512h __B) { +_mm512_cvts_biasph_bf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_bf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_bf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiassph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvts_biasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); @@ -118,20 +118,20 @@ _mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiassph_hf8(__m512i __A, __m512h __B) { +_mm512_cvts_biasph_hf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_hf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_hf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiassph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvts_biasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); @@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) { +_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A), (__v32hf)(__B)); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } @@ -195,37 +195,37 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) { +_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A), (__v32hf)(__B)); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvthf8(__m256i __A) { +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvthf8_ph(__m256i __A) { return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 -_mm512_mask_cvthf8(__m512h __W, __mmask32 __U, __m256i __A) { +_mm512_mask_cvthf8_ph(__m512h __W, __mmask32 __U, __m256i __A) { return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvthf8(__mmask32 __U, __m256i __A) { +_mm512_maskz_cvthf8_ph(__mmask32 __U, __m256i __A) { return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U); } @@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) { (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvts_ph_bf8(__m512h __A) { return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { +_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) { +_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } @@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) { (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvts_ph_hf8(__m512h __A) { return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { +_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) { +_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } diff --git a/lib/include/avx10_2_512satcvtdsintrin.h b/lib/include/avx10_2_512satcvtdsintrin.h index 5970ab0331..012a6282b5 100644 --- a/lib/include/avx10_2_512satcvtdsintrin.h +++ b/lib/include/avx10_2_512satcvtdsintrin.h @@ -20,20 +20,21 @@ __min_vector_width__(512))) // 512 bit : Double -> Int -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epi32(__m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) { (const int)(__R))) // 512 bit : Double -> uInt -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epu32(__m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) { // 512 bit : Double -> Long -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epi64(__m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) { // 512 bit : Double -> ULong -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epu64(__m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) { (const int)(__R))) // 512 bit: Float -> int -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) { +_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) { +_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) { (__mmask16)(__U), (const int)(__R))) // 512 bit: Float -> uint -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { +_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) { +_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) { (__mmask16)(__U), (const int)(__R))) // 512 bit : float -> long -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) { +_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) { +_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) { (const int)(__R))) // 512 bit : float -> ulong -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) { +_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) { +_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); diff --git a/lib/include/avx10_2_512satcvtintrin.h b/lib/include/avx10_2_512satcvtintrin.h index 7f41deb521..b58e3db895 100644 --- a/lib/include/avx10_2_512satcvtintrin.h +++ b/lib/include/avx10_2_512satcvtintrin.h @@ -14,286 +14,286 @@ #ifndef __AVX10_2_512SATCVTINTRIN_H #define __AVX10_2_512SATCVTINTRIN_H -#define _mm512_ipcvtbf16_epi8(A) \ +#define _mm512_ipcvts_bf16_epi8(A) \ ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \ +#define _mm512_mask_ipcvts_bf16_epi8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epi8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epi8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvtbf16_epi8(U, A) \ +#define _mm512_maskz_ipcvts_bf16_epi8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epi8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epi8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvtbf16_epu8(A) \ +#define _mm512_ipcvts_bf16_epu8(A) \ ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \ +#define _mm512_mask_ipcvts_bf16_epu8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epu8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epu8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvtbf16_epu8(U, A) \ +#define _mm512_maskz_ipcvts_bf16_epu8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epu8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epu8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvttbf16_epi8(A) \ +#define _mm512_ipcvtts_bf16_epi8(A) \ ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \ +#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epi8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvttbf16_epi8(U, A) \ +#define _mm512_maskz_ipcvtts_bf16_epi8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epi8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvttbf16_epu8(A) \ +#define _mm512_ipcvtts_bf16_epu8(A) \ ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \ +#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epu8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvttbf16_epu8(U, A) \ +#define _mm512_maskz_ipcvtts_bf16_epu8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epu8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvtph_epi8(A) \ +#define _mm512_ipcvts_ph_epi8(A) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtph_epi8(W, U, A) \ +#define _mm512_mask_ipcvts_ph_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtph_epi8(U, A) \ +#define _mm512_maskz_ipcvts_ph_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundph_epi8(A, R) \ +#define _mm512_ipcvts_roundph_epi8(A, R) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)_mm512_setzero_si512(), \ - (__mmask32)-1, (const int)R)) + (__mmask32) - 1, (const int)R)) -#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)_mm512_setzero_si512(), \ (__mmask32)(U), (const int)R)) -#define _mm512_ipcvtph_epu8(A) \ +#define _mm512_ipcvts_ph_epu8(A) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtph_epu8(W, U, A) \ +#define _mm512_mask_ipcvts_ph_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtph_epu8(U, A) \ +#define _mm512_maskz_ipcvts_ph_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundph_epu8(A, R) \ +#define _mm512_ipcvts_roundph_epu8(A, R) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ (const int)R)) -#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ (const int)R)) -#define _mm512_ipcvtps_epi8(A) \ +#define _mm512_ipcvts_ps_epi8(A) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ - (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtps_epi8(W, U, A) \ +#define _mm512_mask_ipcvts_ps_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtps_epi8(U, A) \ +#define _mm512_maskz_ipcvts_ps_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundps_epi8(A, R) \ +#define _mm512_ipcvts_roundps_epi8(A, R) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ (__v16su)_mm512_setzero_si512(), \ - (__mmask16)-1, (const int)R)) + (__mmask16) - 1, (const int)R)) -#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ (__v16su)_mm512_setzero_si512(), \ (__mmask16)(U), (const int)R)) -#define _mm512_ipcvtps_epu8(A) \ +#define _mm512_ipcvts_ps_epu8(A) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ - (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtps_epu8(W, U, A) \ +#define _mm512_mask_ipcvts_ps_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtps_epu8(U, A) \ +#define _mm512_maskz_ipcvts_ps_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundps_epu8(A, R) \ +#define _mm512_ipcvts_roundps_epu8(A, R) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ - (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ (const int)R)) -#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ (const int)R)) -#define _mm512_ipcvttph_epi8(A) \ +#define _mm512_ipcvtts_ph_epi8(A) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttph_epi8(W, U, A) \ +#define _mm512_mask_ipcvtts_ph_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttph_epi8(U, A) \ +#define _mm512_maskz_ipcvtts_ph_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundph_epi8(A, S) \ +#define _mm512_ipcvtts_roundph_epi8(A, S) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) -#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ S)) -#define _mm512_ipcvttph_epu8(A) \ +#define _mm512_ipcvtts_ph_epu8(A) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttph_epu8(W, U, A) \ +#define _mm512_mask_ipcvtts_ph_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttph_epu8(U, A) \ +#define _mm512_maskz_ipcvtts_ph_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundph_epu8(A, S) \ +#define _mm512_ipcvtts_roundph_epu8(A, S) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) -#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ S)) -#define _mm512_ipcvttps_epi8(A) \ +#define _mm512_ipcvtts_ps_epi8(A) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttps_epi8(W, U, A) \ +#define _mm512_mask_ipcvtts_ps_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttps_epi8(U, A) \ +#define _mm512_maskz_ipcvtts_ps_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundps_epi8(A, S) \ +#define _mm512_ipcvtts_roundps_epi8(A, S) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) -#define _mm512_maskz_ipcvtt_roundps_epi8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ S)) -#define _mm512_ipcvttps_epu8(A) \ +#define _mm512_ipcvtts_ps_epu8(A) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttps_epu8(W, U, A) \ +#define _mm512_mask_ipcvtts_ps_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttps_epu8(U, A) \ +#define _mm512_maskz_ipcvtts_ps_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundps_epu8(A, S) \ +#define _mm512_ipcvtts_roundps_epu8(A, S) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) -#define _mm512_maskz_ipcvtt_roundps_epu8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ S)) diff --git a/lib/include/avx10_2bf16intrin.h b/lib/include/avx10_2bf16intrin.h index 199cc13ff7..66797ae00f 100644 --- a/lib/include/avx10_2bf16intrin.h +++ b/lib/include/avx10_2bf16intrin.h @@ -852,8 +852,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -880,8 +880,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -908,8 +908,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, - (__v16bf)__C); + return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, + (__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh( @@ -938,8 +938,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { - return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, - -(__v16bf)__C); + return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B, + -(__v16bf)__C); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh( @@ -969,8 +969,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -997,8 +997,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1025,8 +1025,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, - (__v8bf)__C); + return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, + (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -1053,8 +1053,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { - return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, - -(__v8bf)__C); + return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B, + -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 diff --git a/lib/include/avx10_2convertintrin.h b/lib/include/avx10_2convertintrin.h index c67a5b890f..19d91d41f7 100644 --- a/lib/include/avx10_2convertintrin.h +++ b/lib/include/avx10_2convertintrin.h @@ -24,71 +24,351 @@ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ __min_vector_width__(256))) +// clang-format off + +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower 4 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 128-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// IF i < 4 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 4]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( - (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1), - _MM_FROUND_CUR_DIRECTION); + (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1)); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Merging mask \a __U is used to determine if given +/// element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( - (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U, - _MM_FROUND_CUR_DIRECTION); + (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed +/// single-precision (32-bit) floating-point elements to a 256-bit vector +/// containing FP16 elements. Zeroing mask \a __U is used to determine if given +/// element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.fp16[i] := convert_fp32_to_fp16(__B.fp32[i]) +/// ELSE +/// dst.fp16[i] := convert_fp32_to_fp16(__A.fp32[i - 8]) +/// FI +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PS2PHX instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Lower elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( - (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, - _MM_FROUND_CUR_DIRECTION); + (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } -#define _mm256_cvtx_round2ps_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)(-1), (const int)(R))) - -#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R))) - -#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ - (__mmask16)(U), (const int)(R))) - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, - __m128h __B) { +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( @@ -96,6 +376,29 @@ _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( @@ -103,12 +406,76 @@ _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( @@ -116,57 +483,324 @@ _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { +_mm_cvts_biasph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { - return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( - (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); -} +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvts_biasph_bf8(__m128i + __W, __mmask8 __U, __m128i __A, __m128h __B) { return + (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, + (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvts_biasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } + +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { +_mm256_cvts_biasph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts_biasph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E5M2 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvts_biasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A, - __m128h __B) { +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( @@ -174,6 +808,29 @@ _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x half]. +/// \param __B +/// A 256-bit vector of [16 x i16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( @@ -181,12 +838,76 @@ _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { (__mmask16)-1); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Zeroing mask \a __U is used to determine if +/// given element should be taken zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x half]. +/// \param __B +/// A 256-bit vector of [16 x i16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( @@ -194,56 +915,335 @@ _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { (__mmask16)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S`instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B using biases from \a __A; higher order +/// elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { +_mm_cvts_biasph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvts_biasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x int16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// converted elements from \a __B, using biases from \a __A; higher order +/// elements are zeroed. If corresponding mask bit is not set, then element +/// is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvts_biasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the +/// converted elements from \a __B using biases from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { +_mm256_cvts_biasph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Merging mask \a __U +/// is used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element from \a __W is taken instead. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts_biasph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __B containing packed FP16 floating-point elements +/// to FP8 E4M3 numbers, using conversion biases stored in lower 8 bits of each +/// 16-bit integer stored in \a __B. Results are saturated. Zeroing mask \a __U +/// is used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_with_bias_saturate(__A.int8[2 * i], __B.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTBIASPH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x int16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Elements correspond to the converted +/// elements from \a __B, using biases from \a __A. If corresponding mask bit +/// is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvts_biasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B)); + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), + (__v8hf)(__B)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -251,73 +1251,481 @@ _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A, - __m256h __B) { - return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), - (__v16hf)(__B)); -} - +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); +_mm256_cvt2ph_bf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), + (__v16hf)(__B)); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x bf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvt2ph_bf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8 instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), + (__mmask32)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvts_2ph_bf8(__m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), + (__v8hf)(__B)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvts_2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_bf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvts_2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_bf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { +_mm256_cvts_2ph_bf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), - (__v16hf)(__B)); + (__v16hf)(__B)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x bf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts_2ph_bf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_bf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E5M2 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2BF8S instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x bf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvts_2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_bf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B)); + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), + (__v8hf)(__B)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( @@ -325,264 +1733,1523 @@ _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A, - __m256h __B) { - return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), - (__v16hf)(__B)); -} - +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { - return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); +_mm256_cvt2ph_hf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), + (__v16hf)(__B)); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x hf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvt2ph_hf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); +} + +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8 instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), + (__mmask32)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvts_2ph_hf8(__m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), + (__v8hf)(__B)); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvts_2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_hf8(__A, __B), (__v16qi)__W); } +/// Convert two 128-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 128-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// IF i < 8 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 8]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \param __B +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower 8 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvts_2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_hf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { +_mm256_cvts_2ph_hf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), - (__v16hf)(__B)); + (__v16hf)(__B)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __W +/// A 256-bit vector of [32 x hf8]. +/// \param __U +/// A 32-bit merging mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts_2ph_hf8( + __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_hf8(__A, __B), (__v32qi)__W); } +/// Convert two 256-bit vectors, \a __A and \a __B, containing packed FP16 +/// floating-point elements to a 256-bit vector containing E4M3 FP8 elements. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. Resulting elements are saturated in case of overflow. +/// +/// \code{.operation} +/// FOR i := 0 to 31 +/// IF __U[i] +/// IF i < 16 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__B.fp16[i]) +/// ELSE +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i - 16]) +/// FI +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVT2PH2HF8S instruction. +/// +/// \param __U +/// A 32-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \param __B +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 256-bit vector of [32 x hf8]. Lower 16 elements correspond to the +/// (converted) elements from \a __B; higher order elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// zero is taken instead. static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvts_2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_hf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8(__m128i __A) { +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8(__m128h __W, - __mmask8 __U, - __m128i __A) { +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvthf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8(__mmask8 __U, - __m128i __A) { +/// Convert 128-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [16 x hf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvthf8_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8(__m128i __A) { +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8_ph(__m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_mask_cvthf8(__m256h __W, __mmask16 __U, __m128i __A) { +_mm256_mask_cvthf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_hf8_to_fp16(__A.hf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:256] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTHF82PH instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [32 x hf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvthf8(__mmask16 __U, __m128i __A) { +_mm256_maskz_cvthf8_ph(__mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts_ph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { +_mm_mask_cvts_ph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { +_mm_maskz_cvts_ph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvts_ph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := __W.bf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x bf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { +_mm256_mask_cvts_ph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Results are saturated. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.bf8[i] := convert_fp16_to_bf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.bf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2BF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x bf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { +_mm256_maskz_cvts_ph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E5M2 FP8 elements. Upper elements of +/// resulting vector are zeroed. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Merging mask \a __U is used to determine if +/// given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Zeroing mask \a __U is used to determine if +/// given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8 instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the (converted) +/// elements from \a __A; upper elements are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts_ph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Merging mask \a __U is +/// used to determine if given element should be taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { +_mm_mask_cvts_ph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } +/// Convert 128-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Upper elements of +/// resulting vector are zeroed. Results are saturated. Zeroing mask \a __U is +/// used to determine if given element should be zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:64] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Lower elements correspond to the +/// (converted) elements from \a __A; upper elements are zeroed. If +/// corresponding mask bit is not set, then element is zeroed. static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { +_mm_maskz_cvts_ph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the (converted) +/// elements from \a __A. +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvts_ph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// Merging mask \a __U is used to determine if given element should be taken +/// from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := __W.hf8[i] +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __W +/// A 128-bit vector of [16 x hf8]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [8 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If +/// corresponding mask bit is not set, then element from \a __W is taken instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { +_mm256_mask_cvts_ph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } +/// Convert 256-bit vector \a __A containing packed FP16 floating-point elements +/// to a 128-bit vector containing E4M3 FP8 elements. Results are saturated. +/// Zeroing mask \a __U is used to determine if given element should be zeroed +/// instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.hf8[i] := convert_fp16_to_hf8_saturate(__A.fp16[i]) +/// ELSE +/// dst.hf8[i] := 0 +/// FI +/// ENDFOR +/// +/// dst[MAX:128] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c VCVTPH2HF8S instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [16 x fp16]. +/// \returns +/// A 128-bit vector of [16 x hf8]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, +/// then element is zeroed instead. static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { +_mm256_maskz_cvts_ph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8)); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __W +/// A 128-bit vector of [8 x fp16]. +/// \param __U +/// A 8-bit merging mask. +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { +_mm_mask_cvtbf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { return _mm_castsi128_ph( - _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8)); + _mm_mask_slli_epi16((__m128i)__W, __U, _mm_cvtepi8_epi16(__A), 8)); } +/// Convert 128-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 128-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 7 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __U +/// A 8-bit zeroing mask. +/// \param __A +/// A 128-bit vector of [16 x bf8]. +/// \returns +/// A 128-bit vector of [8 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E4M3 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is exact. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) { return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Merging mask \a __U is used to determine if given element should be +/// taken from \a __W instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := __W.fp16[i] +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __W +/// A 256-bit vector of [16 x fp16]. +/// \param __U +/// A 16-bit merging mask. +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// element from \a __W is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { +_mm256_mask_cvtbf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( - _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8)); + _mm256_mask_slli_epi16((__m256i)__W, __U, _mm256_cvtepi8_epi16(__A), 8)); } +/// Convert 256-bit vector \a __A, containing packed FP8 E5M2 floating-point +/// elements to a 256-bit vector containing FP16 elements. The conversion is +/// exact. Zeroing mask \a __U is used to determine if given element should be +/// zeroed instead. +/// +/// \code{.operation} +/// FOR i := 0 to 15 +/// IF __U[i] +/// dst.fp16[i] := convert_bf8_to_fp16(__A.bf8[i]) +/// ELSE +/// dst.fp16[i] := 0 +/// FI +/// ENDFOR +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a single instruction. +/// +/// \param __U +/// A 16-bit zeroing mask. +/// \param __A +/// A 256-bit vector of [32 x bf8]. +/// \returns +/// A 256-bit vector of [16 x fp16]. Resulting elements correspond to the +/// (converted) elements from \a __A. If corresponding mask bit is not set, then +/// zero is taken instead. static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8)); } +// clang-format on + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/lib/include/avx10_2minmaxintrin.h b/lib/include/avx10_2minmaxintrin.h index 8164d49d89..809a01b04f 100644 --- a/lib/include/avx10_2minmaxintrin.h +++ b/lib/include/avx10_2minmaxintrin.h @@ -66,34 +66,19 @@ (__v2df)_mm_setzero_pd(), (__mmask8)(U))) #define _mm256_minmax_pd(A, B, C) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + ((__m256d)__builtin_ia32_vminmaxpd256_mask( \ (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC)) + (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) #define _mm256_mask_minmax_pd(W, U, A, B, C) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + ((__m256d)__builtin_ia32_vminmaxpd256_mask( \ (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)(W), (__mmask8)(U), _MM_FROUND_NO_EXC)) + (__v4df)(__m256d)(W), (__mmask8)(U))) #define _mm256_maskz_minmax_pd(U, A, B, C) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + ((__m256d)__builtin_ia32_vminmaxpd256_mask( \ (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC)) - -#define _mm256_minmax_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) + (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) #define _mm_minmax_ph(A, B, C) \ ((__m128h)__builtin_ia32_vminmaxph128_mask( \ @@ -111,34 +96,19 @@ (__v8hf)_mm_setzero_ph(), (__mmask8)(U))) #define _mm256_minmax_ph(A, B, C) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + ((__m256h)__builtin_ia32_vminmaxph256_mask( \ (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC)) + (__v16hf)_mm256_setzero_ph(), (__mmask16)-1)) #define _mm256_mask_minmax_ph(W, U, A, B, C) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + ((__m256h)__builtin_ia32_vminmaxph256_mask( \ (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)(__m256h)(W), (__mmask16)(U), _MM_FROUND_NO_EXC)) + (__v16hf)(__m256h)(W), (__mmask16)(U))) #define _mm256_maskz_minmax_ph(U, A, B, C) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + ((__m256h)__builtin_ia32_vminmaxph256_mask( \ (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC)) - -#define _mm256_minmax_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) - -#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C), \ - (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + (__v16hf)_mm256_setzero_ph(), (__mmask16)(U))) #define _mm_minmax_ps(A, B, C) \ ((__m128)__builtin_ia32_vminmaxps128_mask( \ @@ -156,34 +126,19 @@ (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) #define _mm256_minmax_ps(A, B, C) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + ((__m256)__builtin_ia32_vminmaxps256_mask( \ (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC)) + (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) #define _mm256_mask_minmax_ps(W, U, A, B, C) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + ((__m256)__builtin_ia32_vminmaxps256_mask( \ (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ - (__mmask8)(U), _MM_FROUND_NO_EXC)) + (__mmask8)(U))) #define _mm256_maskz_minmax_ps(U, A, B, C) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + ((__m256)__builtin_ia32_vminmaxps256_mask( \ (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC)) - -#define _mm256_minmax_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) + (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) #define _mm_minmax_sd(A, B, C) \ ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ diff --git a/lib/include/avx10_2niintrin.h b/lib/include/avx10_2niintrin.h index c91a7b57c7..992be18f77 100644 --- a/lib/include/avx10_2niintrin.h +++ b/lib/include/avx10_2niintrin.h @@ -402,1672 +402,6 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32( (__v8si)_mm256_setzero_si256()); } -/* YMM Rounding */ -#define _mm256_add_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_add_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_add_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_add_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_add_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_add_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_add_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_add_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_add_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_cmp_round_pd_mask(A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cmp_round_pd_mask(U, A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cmp_round_ph_mask(A, B, P, R) \ - ((__mmask16)__builtin_ia32_vcmpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)-1, \ - (int)(R))) - -#define _mm256_mask_cmp_round_ph_mask(U, A, B, P, R) \ - ((__mmask16)__builtin_ia32_vcmpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cmp_round_ps_mask(A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cmp_round_ps_mask(U, A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepi32_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask( \ - (__v8si)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask((__v8si)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask( \ - (__v8si)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepi32_ps(A, R) \ - ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask( \ - (__v8si)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \ - ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundpd_epi32(A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epi32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundpd_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask( \ - (__v4df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask((__v4df)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask( \ - (__v4df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundpd_ps(A, R) \ - ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask( \ - (__v4df)(__m256d)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \ - ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask( \ - (__v4df)(__m256d)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_ps(U, A, R) \ - ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask((__v4df)(__m256d)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundpd_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundpd_epu32(A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundpd_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundph_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask((__v8hf)(A), (__v8si)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask( \ - (__v8hf)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundph_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask((__v8hf)(A), (__v4df)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask( \ - (__v8hf)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtx_roundph_ps(A, R) \ - ((__m256)__builtin_ia32_vcvtph2psx256_round_mask( \ - (__v8hf)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vcvtph2psx256_round_mask((__v8hf)(A), (__v8sf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtx_roundph_ps(U, A, R) \ - ((__m256)__builtin_ia32_vcvtph2psx256_round_mask( \ - (__v8hf)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask((__v8hf)(A), (__v4di)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epu16(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask((__v16hf)(A), (__v16hu)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epu16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvt_roundph_epi16(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epi16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ - (__v4sf)(__m128)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ - (__v4sf)(__m128)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ - (__v4sf)(__m128)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_ph(A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ - (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1)) - -/* FIXME: We may use these way in future. -#define _mm256_cvt_roundps_ph(A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ - (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1)) -#define _mm256_mask_cvt_roundps_ph(U, W, A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ - (__v8sf)(__m256)(A), (int)(I), (__v8hi)(__m128i)(U), (__mmask8)(W))) -#define _mm256_maskz_cvt_roundps_ph(W, A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ - (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(W))) */ - -#define _mm256_cvtx_roundps_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask( \ - (__v8sf)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask((__v8sf)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtx_roundps_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask( \ - (__v8sf)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundps_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepi64_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ - (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ - (__v4di)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ - (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepi64_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask( \ - (__v4di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask((__v4di)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask( \ - (__v4di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepi64_ps(A, R) \ - ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask( \ - (__v4di)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \ - ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask( \ - (__v4di)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \ - ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask((__v4di)(__m256i)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundpd_epi32(A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundpd_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundpd_epu32(A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundpd_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundph_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask((__v8hf)(A), (__v8si)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask((__v8hf)(A), (__v4di)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epu16(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)(W), (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundph_epi16(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epi16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepu32_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask( \ - (__v8su)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask((__v8su)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask( \ - (__v8su)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepu32_ps(A, R) \ - ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ - (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ - (__v8su)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \ - ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ - (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepu64_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ - (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ - (__v4du)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ - (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepu64_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask( \ - (__v4du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask((__v4du)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask( \ - (__v4du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepu64_ps(A, R) \ - ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask( \ - (__v4du)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \ - ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask( \ - (__v4du)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \ - ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask((__v4du)(__m256i)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepu16_ph(A, R) \ - ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask( \ - (__v16hu)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask((__v16hu)(A), (__v16hf)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \ - ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask( \ - (__v16hu)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_cvt_roundepi16_ph(A, R) \ - ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask( \ - (__v16hi)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask((__v16hi)(A), (__v16hf)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \ - ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask( \ - (__v16hi)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_div_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vdivpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_div_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_div_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_div_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vdivph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_div_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_div_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_div_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vdivps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_div_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_div_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_fcmadd_round_pch(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fcmadd_round_pch(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fcmadd_round_pch(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fcmadd_round_pch(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_maskz( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cmul_round_pch(A, B, R) \ - ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cmul_round_pch(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cmul_round_pch(U, A, B, R) \ - ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_fixupimm_round_pd(A, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vfixupimmpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_fixupimm_round_ps(A, B, C, imm, R) \ - ((__m256)__builtin_ia32_vfixupimmps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ - (int)(imm), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ - ((__m256)__builtin_ia32_vfixupimmps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ - ((__m256)__builtin_ia32_vfixupimmps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_fmadd_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsub_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsub_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsub_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmadd_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask3_fnmadd_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fnmadd_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmsub_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_maskz_fnmsub_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmadd_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmsub_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmsub_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmsub_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fnmadd_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask3_fnmadd_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask3( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fnmadd_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fnmsub_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_maskz_fnmsub_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmadd_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsub_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsub_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsub_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmadd_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask3_fnmadd_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask3( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fnmadd_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmsub_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_maskz_fnmsub_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmadd_round_pch(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_pch(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_pch(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_pch(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_maskz( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmaddsub_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmaddsub_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmaddsub_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmaddsub_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsubadd_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsubadd_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsubadd_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmaddsub_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmaddsub_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmaddsub_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmaddsub_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmsubadd_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmsubadd_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmsubadd_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmaddsub_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmaddsub_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmaddsub_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmaddsub_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsubadd_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsubadd_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsubadd_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) -#define _mm256_mask3_fmsub_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmsubadd_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmsubaddpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmadd_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmsub_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fnmsub_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmsub_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmsubph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmsubadd_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmsubaddph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask_fnmadd_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask_fnmsub_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fnmsub_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmsubph256_round_mask3( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmsub_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmsubps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmsubadd_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmsubaddps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmadd_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmsub_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fnmsub_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmsubps256_round_mask3( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mul_round_pch(A, B, R) \ - ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_mul_round_pch(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_mul_round_pch(U, A, B, R) \ - ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_getexp_round_pd(A, R) \ - ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_getexp_round_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getexp_round_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_getexp_round_ph(A, R) \ - ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, \ - (int)(R))) - -#define _mm256_mask_getexp_round_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_getexp_round_ph(U, A, R) \ - ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_getexp_round_ps(A, R) \ - ((__m256)__builtin_ia32_vgetexpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_getexp_round_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vgetexpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getexp_round_ps(U, A, R) \ - ((__m256)__builtin_ia32_vgetexpps256_round_mask((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_getmant_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), \ - (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), (__v4df)(__m256d)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - -#define _mm256_getmant_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ - (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) - -#define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_getmant_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), \ - (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \ - ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - -#define _mm256_max_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vmaxpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_max_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_max_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_max_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vmaxph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_max_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_max_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_max_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vmaxps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_max_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_max_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_min_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vminpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_min_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_min_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_min_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vminph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_min_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_min_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_min_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vminps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_min_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_min_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_mul_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vmulpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_mul_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_mul_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_mul_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vmulph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_mul_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_mul_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_mul_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vmulps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_mul_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_mul_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_range_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_range_round_pd(W, U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_range_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - -#define _mm256_range_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vrangeps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_range_round_ps(W, U, A, B, C, R) \ - ((__m256)__builtin_ia32_vrangeps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_range_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vrangeps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - -#define _mm256_reduce_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_reduce_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_maskz_reduce_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_reduce_round_ph(W, U, A, imm, R) \ - ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_reduce_round_ph(U, A, imm, R) \ - ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_reduce_round_ph(A, imm, R) \ - ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_reduce_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vreduceps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_reduce_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_vreduceps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_maskz_reduce_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_vreduceps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_roundscale_round_pd(A, imm, R) \ - ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(imm), (__v4df)_mm256_undefined_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_roundscale_round_pd(A, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ - (__v4df)(__m256d)(C), (int)(imm), (__v4df)(__m256d)(A), (__mmask8)(B), \ - (int)(R))) - -#define _mm256_maskz_roundscale_round_pd(A, B, imm, R) \ - ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ - (__v4df)(__m256d)(B), (int)(imm), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(A), (int)(R))) - -#define _mm256_roundscale_round_ph(A, imm, R) \ - ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_roundscale_round_ph(A, B, C, imm, R) \ - ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ - (__v16hf)(__m256h)(C), (int)(imm), (__v16hf)(__m256h)(A), \ - (__mmask16)(B), (int)(R))) - -#define _mm256_maskz_roundscale_round_ph(A, B, imm, R) \ - ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ - (__v16hf)(__m256h)(B), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ - (__mmask16)(A), (int)(R))) - -#define _mm256_roundscale_round_ps(A, imm, R) \ - ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(imm), (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_roundscale_round_ps(A, B, C, imm, R) \ - ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ - (__v8sf)(__m256)(C), (int)(imm), (__v8sf)(__m256)(A), (__mmask8)(B), \ - (int)(R))) - -#define _mm256_maskz_roundscale_round_ps(A, B, imm, R) \ - ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ - (__v8sf)(__m256)(B), (int)(imm), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(A), (int)(R))) - -#define _mm256_scalef_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), \ - (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_scalef_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_scalef_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_scalef_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), \ - (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) - -#define _mm256_mask_scalef_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_scalef_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_scalef_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vscalefps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_scalef_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_vscalefps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_scalef_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_vscalefps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_sqrt_round_pd(A, R) \ - ((__m256d)__builtin_ia32_vsqrtpd256_round((__v4df)(__m256d)(A), (int)(R))) - -#define _mm256_mask_sqrt_round_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_sqrt_round_pd(U, A, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_sqrt_round_ph(A, R) \ - ((__m256h)__builtin_ia32_vsqrtph256_round((__v16hf)(__m256h)(A), (int)(R))) - -#define _mm256_mask_sqrt_round_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_sqrt_round_ph(U, A, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_sqrt_round_ps(A, R) \ - ((__m256)__builtin_ia32_vsqrtps256_round((__v8sf)(__m256)(A), (int)(R))) - -#define _mm256_mask_sqrt_round_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_sqrt_round_ps((A), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_sqrt_round_ps(U, A, R) \ - ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_sqrt_round_ps((A), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_sub_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vsubpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_sub_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_sub_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_sub_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vsubph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_sub_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_sub_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_sub_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vsubps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_sub_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_sub_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 diff --git a/lib/include/avx10_2satcvtdsintrin.h b/lib/include/avx10_2satcvtdsintrin.h index 5902843631..cc840368c3 100644 --- a/lib/include/avx10_2satcvtdsintrin.h +++ b/lib/include/avx10_2satcvtdsintrin.h @@ -71,175 +71,134 @@ #endif /* __x86_64__ */ // 128 Bit : Double -> int -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epi32(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)__A, (__v4si)__W, __U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epi32(__mmask16 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epi32(__mmask16 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U)); } // 256 Bit : Double -> int static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epi32(__m256d __A) { - return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( - (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_pd_epi32(__m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask( + (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { - return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( - (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_pd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask((__v4df)__A, (__v4si)__W, + __U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epi32(__mmask8 __U, __m256d __A) { - return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( - (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_pd_epi32(__mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask( + (__v4df)__A, (__v4si)_mm_setzero_si128(), __U)); } -#define _mm256_cvtts_roundpd_epi32(__A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \ - (__mmask8)__U, (int)(__R))) - // 128 Bit : Double -> uint -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epu32(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( (__v2df)__A, (__v4si)(__m128i)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epu32(__mmask8 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epu32(__mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U)); } // 256 Bit : Double -> uint static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epu32(__m256d __A) { - return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( - (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_pd_epu32(__m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask( + (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { - return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( - (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_pd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask((__v4df)__A, (__v4si)__W, + __U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epu32(__mmask8 __U, __m256d __A) { - return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( - (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_pd_epu32(__mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask( + (__v4df)__A, (__v4si)_mm_setzero_si128(), __U)); } -#define _mm256_cvtts_roundpd_epu32(__A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \ - (__mmask8)__U, (int)(__R))) - // 128 Bit : Double -> long -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epi64(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epi64(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epi64(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)__A, (__v2di)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epi64(__mmask8 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epi64(__mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } // 256 Bit : Double -> long static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epi64(__m256d __A) { - return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( - (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_pd_epi64(__m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask( + (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { - return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( - (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_pd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask((__v4df)__A, (__v4di)__W, + __U)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epi64(__mmask8 __U, __m256d __A) { - return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( - (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U, - _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_pd_epi64(__mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask( + (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U)); } -#define _mm256_cvtts_roundpd_epi64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \ - (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R)) - // 128 Bit : Double -> ulong -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epu64(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epu64(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epu64(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)__A, (__v2di)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epu64(__mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } @@ -247,105 +206,74 @@ _mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) { // 256 Bit : Double -> ulong static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epu64(__m256d __A) { - return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( - (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_pd_epu64(__m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask( + (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { - return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( - (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_pd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask((__v4df)__A, (__v4di)__W, + __U)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epu64(__mmask8 __U, __m256d __A) { - return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( - (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U, - _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_pd_epu64(__mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask( + (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U)); } -#define _mm256_cvtts_roundpd_epu64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ - (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R)) - // 128 Bit : float -> int -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi32(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)__A, (__v4si)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epi32(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epi32(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( (__v4sf)__A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)__U)); } // 256 Bit : float -> int static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epi32(__m256 __A) { - return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( - (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_ps_epi32(__m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask( + (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { - return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( - (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_ps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask((__v8sf)__A, (__v8si)__W, + __U)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epi32(__mmask8 __U, __m256 __A) { - return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( - (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U, - _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_ps_epi32(__mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask( + (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U)); } -#define _mm256_cvtts_roundps_epi32(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \ - (__mmask8)__U, (int)(__R))) - // 128 Bit : float -> uint -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu32(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)__A, (__v4si)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epu32(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( (__v4sf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U)); } @@ -353,144 +281,96 @@ _mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) { // 256 Bit : float -> uint static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epu32(__m256 __A) { - return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( - (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_ps_epu32(__m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask( + (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { - return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( - (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_ps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask((__v8sf)__A, (__v8si)__W, + __U)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epu32(__mmask8 __U, __m256 __A) { - return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( - (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U, - _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_ps_epu32(__mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask( + (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U)); } -#define _mm256_cvtts_roundps_epu32(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \ - (__mmask8)__U, (int)(__R))) - // 128 bit : float -> long -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi64(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epi64(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epi64(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } // 256 bit : float -> long static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epi64(__m128 __A) { - return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( - (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_ps_epi64(__m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask( + (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { - return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( - (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_ps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask((__v4sf)__A, (__v4di)__W, + __U)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) { - return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( - (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U, - _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask( + (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U)); } -#define _mm256_cvtts_roundps_epi64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \ - (int)__R)) - // 128 bit : float -> ulong -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu64(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epu64(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epu64(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epu64(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } // 256 bit : float -> ulong static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epu64(__m128 __A) { - return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( - (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, - _MM_FROUND_CUR_DIRECTION)); +_mm256_cvtts_ps_epu64(__m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2uqqs256_mask( + (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { - return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( - (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +_mm256_mask_cvtts_ps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2uqqs256_mask((__v4sf)__A, (__v4di)__W, + __U)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) { - return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( - (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U, - _MM_FROUND_CUR_DIRECTION)); +_mm256_maskz_cvtts_ps_epu64(__mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2uqqs256_mask( + (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U)); } -#define _mm256_cvtts_roundps_epu64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundps_epu64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \ - (int)__R)) - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVX10_2SATCVTDSINTRIN_H diff --git a/lib/include/avx10_2satcvtintrin.h b/lib/include/avx10_2satcvtintrin.h index d16c60e638..2f1fad9eff 100644 --- a/lib/include/avx10_2satcvtintrin.h +++ b/lib/include/avx10_2satcvtintrin.h @@ -14,431 +14,299 @@ #ifndef __AVX10_2SATCVTINTRIN_H #define __AVX10_2SATCVTINTRIN_H -#define _mm_ipcvtbf16_epi8(A) \ +#define _mm_ipcvts_bf16_epi8(A) \ ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvtbf16_epi8(W, U, A) \ +#define _mm_mask_ipcvts_bf16_epi8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epi8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epi8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvtbf16_epi8(U, A) \ +#define _mm_maskz_ipcvts_bf16_epi8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvtbf16_epi8(A), \ + (__v8hi)_mm_ipcvts_bf16_epi8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvtbf16_epi8(A) \ +#define _mm256_ipcvts_bf16_epi8(A) \ ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvtbf16_epi8(W, U, A) \ +#define _mm256_mask_ipcvts_bf16_epi8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epi8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epi8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvtbf16_epi8(U, A) \ +#define _mm256_maskz_ipcvts_bf16_epi8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epi8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epi8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvtbf16_epu8(A) \ +#define _mm_ipcvts_bf16_epu8(A) \ ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvtbf16_epu8(W, U, A) \ +#define _mm_mask_ipcvts_bf16_epu8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epu8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epu8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvtbf16_epu8(U, A) \ +#define _mm_maskz_ipcvts_bf16_epu8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvtbf16_epu8(A), \ + (__v8hi)_mm_ipcvts_bf16_epu8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvtbf16_epu8(A) \ +#define _mm256_ipcvts_bf16_epu8(A) \ ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvtbf16_epu8(W, U, A) \ +#define _mm256_mask_ipcvts_bf16_epu8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epu8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epu8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvtbf16_epu8(U, A) \ +#define _mm256_maskz_ipcvts_bf16_epu8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epu8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epu8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvtph_epi8(A) \ +#define _mm_ipcvts_ph_epi8(A) \ ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtph_epi8(W, U, A) \ +#define _mm_mask_ipcvts_ph_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtph_epi8(U, A) \ +#define _mm_maskz_ipcvts_ph_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtph_epi8(A) \ +#define _mm256_ipcvts_ph_epi8(A) \ ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1)) -#define _mm256_mask_ipcvtph_epi8(W, U, A) \ +#define _mm256_mask_ipcvts_ph_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hu)(W), (__mmask16)(U))) -#define _mm256_maskz_ipcvtph_epi8(U, A) \ +#define _mm256_maskz_ipcvts_ph_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ - (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + (__mmask16)(U))) -#define _mm256_ipcvt_roundph_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)_mm256_setzero_si256(), \ - (__mmask16)-1, (const int)R)) - -#define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)_mm256_setzero_si256(), \ - (__mmask16)(U), (const int)R)) - -#define _mm_ipcvtph_epu8(A) \ +#define _mm_ipcvts_ph_epu8(A) \ ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtph_epu8(W, U, A) \ +#define _mm_mask_ipcvts_ph_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtph_epu8(U, A) \ +#define _mm_maskz_ipcvts_ph_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtph_epu8(A) \ +#define _mm256_ipcvts_ph_epu8(A) \ ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1)) -#define _mm256_mask_ipcvtph_epu8(W, U, A) \ +#define _mm256_mask_ipcvts_ph_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hu)(W), (__mmask16)(U))) -#define _mm256_maskz_ipcvtph_epu8(U, A) \ +#define _mm256_maskz_ipcvts_ph_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ - (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + (__mmask16)(U))) -#define _mm256_ipcvt_roundph_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (const int)R)) - -#define _mm_ipcvtps_epi8(A) \ +#define _mm_ipcvts_ps_epi8(A) \ ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtps_epi8(W, U, A) \ +#define _mm_mask_ipcvts_ps_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtps_epi8(U, A) \ +#define _mm_maskz_ipcvts_ps_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtps_epi8(A) \ +#define _mm256_ipcvts_ps_epi8(A) \ ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1)) -#define _mm256_mask_ipcvtps_epi8(W, U, A) \ +#define _mm256_mask_ipcvts_ps_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8su)(W), (__mmask8)(U))) -#define _mm256_maskz_ipcvtps_epi8(U, A) \ +#define _mm256_maskz_ipcvts_ps_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U))) -#define _mm256_ipcvt_roundps_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)-1, (const int)R)) - -#define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)(U), (const int)R)) - -#define _mm_ipcvtps_epu8(A) \ +#define _mm_ipcvts_ps_epu8(A) \ ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtps_epu8(W, U, A) \ +#define _mm_mask_ipcvts_ps_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtps_epu8(U, A) \ +#define _mm_maskz_ipcvts_ps_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtps_epu8(A) \ +#define _mm256_ipcvts_ps_epu8(A) \ ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1)) -#define _mm256_mask_ipcvtps_epu8(W, U, A) \ +#define _mm256_mask_ipcvts_ps_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ - (__v8su)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8su)(W), (__mmask8)(U))) -#define _mm256_maskz_ipcvtps_epu8(U, A) \ +#define _mm256_maskz_ipcvts_ps_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U))) -#define _mm256_ipcvt_roundps_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)-1, (const int)R)) - -#define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)(U), (const int)R)) - -#define _mm_ipcvttbf16_epi8(A) \ +#define _mm_ipcvtts_bf16_epi8(A) \ ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvttbf16_epi8(W, U, A) \ +#define _mm_mask_ipcvtts_bf16_epi8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epi8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epi8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvttbf16_epi8(U, A) \ +#define _mm_maskz_ipcvtts_bf16_epi8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvttbf16_epi8(A), \ + (__v8hi)_mm_ipcvtts_bf16_epi8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvttbf16_epi8(A) \ +#define _mm256_ipcvtts_bf16_epi8(A) \ ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvttbf16_epi8(W, U, A) \ +#define _mm256_mask_ipcvtts_bf16_epi8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epi8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvttbf16_epi8(U, A) \ +#define _mm256_maskz_ipcvtts_bf16_epi8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epi8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvttbf16_epu8(A) \ +#define _mm_ipcvtts_bf16_epu8(A) \ ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvttbf16_epu8(W, U, A) \ +#define _mm_mask_ipcvtts_bf16_epu8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epu8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epu8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvttbf16_epu8(U, A) \ +#define _mm_maskz_ipcvtts_bf16_epu8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvttbf16_epu8(A), \ + (__v8hi)_mm_ipcvtts_bf16_epu8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvttbf16_epu8(A) \ +#define _mm256_ipcvtts_bf16_epu8(A) \ ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvttbf16_epu8(W, U, A) \ +#define _mm256_mask_ipcvtts_bf16_epu8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epu8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvttbf16_epu8(U, A) \ +#define _mm256_maskz_ipcvtts_bf16_epu8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epu8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvttph_epi8(A) \ +#define _mm_ipcvtts_ph_epi8(A) \ ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttph_epi8(W, U, A) \ +#define _mm_mask_ipcvtts_ph_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttph_epi8(U, A) \ +#define _mm_maskz_ipcvtts_ph_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttph_epi8(A) \ +#define _mm256_ipcvtts_ph_epi8(A) \ ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1)) -#define _mm256_mask_ipcvttph_epi8(W, U, A) \ +#define _mm256_mask_ipcvtts_ph_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hu)(W), (__mmask16)(U))) -#define _mm256_maskz_ipcvttph_epi8(U, A) \ +#define _mm256_maskz_ipcvtts_ph_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ - (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + (__mmask16)(U))) -#define _mm256_ipcvtt_roundph_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (const int)R)) - -#define _mm_ipcvttph_epu8(A) \ +#define _mm_ipcvtts_ph_epu8(A) \ ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttph_epu8(W, U, A) \ +#define _mm_mask_ipcvtts_ph_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttph_epu8(U, A) \ +#define _mm_maskz_ipcvtts_ph_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttph_epu8(A) \ +#define _mm256_ipcvtts_ph_epu8(A) \ ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1)) -#define _mm256_mask_ipcvttph_epu8(W, U, A) \ +#define _mm256_mask_ipcvtts_ph_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)(W), (__mmask16)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v16hu)(W), (__mmask16)(U))) -#define _mm256_maskz_ipcvttph_epu8(U, A) \ +#define _mm256_maskz_ipcvtts_ph_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ - (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + (__mmask16)(U))) -#define _mm256_ipcvtt_roundph_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (const int)R)) - -#define _mm_ipcvttps_epi8(A) \ +#define _mm_ipcvtts_ps_epi8(A) \ ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttps_epi8(W, U, A) \ +#define _mm_mask_ipcvtts_ps_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttps_epi8(U, A) \ +#define _mm_maskz_ipcvtts_ps_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttps_epi8(A) \ +#define _mm256_ipcvtts_ps_epi8(A) \ ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1)) -#define _mm256_mask_ipcvttps_epi8(W, U, A) \ +#define _mm256_mask_ipcvtts_ps_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8su)(W), (__mmask8)(U))) -#define _mm256_maskz_ipcvttps_epi8(U, A) \ +#define _mm256_maskz_ipcvtts_ps_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U))) -#define _mm256_ipcvtt_roundps_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)-1, (const int)R)) - -#define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)(U), (const int)R)) - -#define _mm_ipcvttps_epu8(A) \ +#define _mm_ipcvtts_ps_epu8(A) \ ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttps_epu8(W, U, A) \ +#define _mm_mask_ipcvtts_ps_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttps_epu8(U, A) \ +#define _mm_maskz_ipcvtts_ps_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttps_epu8(A) \ +#define _mm256_ipcvtts_ps_epu8(A) \ ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - _MM_FROUND_CUR_DIRECTION)) + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1)) -#define _mm256_mask_ipcvttps_epu8(W, U, A) \ +#define _mm256_mask_ipcvtts_ps_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \ - (__v8su)(W), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) + (__v8su)(W), (__mmask8)(U))) -#define _mm256_maskz_ipcvttps_epu8(U, A) \ +#define _mm256_maskz_ipcvtts_ps_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ - _MM_FROUND_CUR_DIRECTION)) - -#define _mm256_ipcvtt_roundps_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ - (const int)R)) + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U))) #endif // __AVX10_2SATCVTINTRIN_H diff --git a/lib/include/avx512fp16intrin.h b/lib/include/avx512fp16intrin.h index e136aa14a1..92df320b45 100644 --- a/lib/include/avx512fp16intrin.h +++ b/lib/include/avx512fp16intrin.h @@ -553,7 +553,8 @@ static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) { } static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) { - return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f)); + return (__m512h)_mm512_xor_epi32((__m512i)__A, + _mm512_set1_epi32(-2147483648)); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 diff --git a/lib/include/bmiintrin.h b/lib/include/bmiintrin.h index 59c5ece397..8024da5537 100644 --- a/lib/include/bmiintrin.h +++ b/lib/include/bmiintrin.h @@ -161,8 +161,6 @@ _mm_tzcnt_64(unsigned long long __X) { #undef __RELAXED_FN_ATTRS -#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) - /* Define the default attributes for the functions in this file. */ #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ @@ -603,6 +601,4 @@ __blsr_u64(unsigned long long __X) { #undef __DEFAULT_FN_ATTRS -#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */ - #endif /* __BMIINTRIN_H */ diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h index 2601aa5724..52addb7bfa 100644 --- a/lib/include/cpuid.h +++ b/lib/include/cpuid.h @@ -267,18 +267,18 @@ : "0"(__leaf), "2"(__count)) #else /* x86-64 uses %rbx as the base register, so preserve it. */ -#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ - __asm(" xchgq %%rbx,%q1\n" \ - " cpuid\n" \ - " xchgq %%rbx,%q1" \ - : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ +#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ + __asm(" xchg{q|} {%%|}rbx,%q1\n" \ + " cpuid\n" \ + " xchg{q|} {%%|}rbx,%q1" \ + : "=a"(__eax), "=r"(__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf)) -#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \ - __asm(" xchgq %%rbx,%q1\n" \ - " cpuid\n" \ - " xchgq %%rbx,%q1" \ - : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ +#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \ + __asm(" xchg{q|} {%%|}rbx,%q1\n" \ + " cpuid\n" \ + " xchg{q|} {%%|}rbx,%q1" \ + : "=a"(__eax), "=r"(__ebx), "=c"(__ecx), "=d"(__edx) \ : "0"(__leaf), "2"(__count)) #endif @@ -289,20 +289,22 @@ static __inline unsigned int __get_cpuid_max (unsigned int __leaf, #ifdef __i386__ int __cpuid_supported; - __asm(" pushfl\n" - " popl %%eax\n" - " movl %%eax,%%ecx\n" - " xorl $0x00200000,%%eax\n" - " pushl %%eax\n" - " popfl\n" - " pushfl\n" - " popl %%eax\n" - " movl $0,%0\n" - " cmpl %%eax,%%ecx\n" + __asm(" pushf{l|d}\n" + " pop{l|} {%%|}eax\n" + " mov{l|} {%%eax,%%ecx|ecx,eax}\n" + " xor{l|} {$0x00200000,%%eax|eax,0x00200000}\n" + " push{l|} {%%|}eax\n" + " popf{l|d}\n" + " pushf{l|d}\n" + " pop{l|} {%%|}eax\n" + " mov{l|} {$0,%0|%0,0}\n" + " cmp{l|} {%%eax,%%ecx|ecx,eax}\n" " je 1f\n" - " movl $1,%0\n" + " mov{l|} {$1,%0|%0,1}\n" "1:" - : "=r" (__cpuid_supported) : : "eax", "ecx"); + : "=r"(__cpuid_supported) + : + : "eax", "ecx"); if (!__cpuid_supported) return 0; #endif diff --git a/lib/include/float.h b/lib/include/float.h index e5c439a9d4..84551af473 100644 --- a/lib/include/float.h +++ b/lib/include/float.h @@ -18,21 +18,12 @@ * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx * - * Also fall back on Darwin and AIX to allow additional definitions and + * Also fall back on AIX to allow additional definitions and * implementation-defined values. */ -#if (defined(__APPLE__) || defined(__MINGW32__) || defined(_MSC_VER) || \ - defined(_AIX)) && \ +#if (defined(__MINGW32__) || defined(_MSC_VER) || defined(_AIX)) && \ __STDC_HOSTED__ && __has_include_next() -/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level - * of #include_next to keep Metrowerks compilers happy. Avoid this - * extra indirection. - */ -#ifdef __APPLE__ -#define _FLOAT_H_ -#endif - # include_next /* Undefine anything that we'll be redefining below. */ diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index 19c5987257..35f012cc70 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -16,231 +16,112 @@ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__SSE4_2__) || defined(__SSE4_1__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AES__) || defined(__PCLMUL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__) #include -#endif -/* No feature check desired due to internal checks */ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512BITALG__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512BW__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512CD__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512DQ__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512IFMA__) && defined(__AVX512VL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VBMI__) && defined(__AVX512VL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512BF16__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__) /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile @@ -252,9 +133,7 @@ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __ _rdpid_u32(void) { return __builtin_ia32_rdpid(); } -#endif // __RDPID__ -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__) /// Returns a 16-bit hardware-generated random value. /// /// \headerfile @@ -314,9 +193,7 @@ _rdrand64_step(unsigned long long *__p) } #endif } -#endif /* __RDRND__ */ -#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__) #ifdef __x86_64__ /// Reads the FS base register. /// @@ -427,9 +304,6 @@ _writegsbase_u64(unsigned long long __V) } #endif -#endif /* __FSGSBASE__ */ - -#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__) /* The structs used below are to force the load/store to be unaligned. This * is accomplished with the __packed__ attribute. The __may_alias__ prevents @@ -543,172 +417,86 @@ _storebe_i64(void * __P, long long __D) { ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); } #endif -#endif /* __MOVBE */ -#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__) #include #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__) #include -#endif /* No feature check desired due to internal MSC_VER checks */ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__) #include -#endif /* Intrinsics inside adcintrin.h are available at all times. */ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \ - defined(__MOVDIR64B__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVRS__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX10_2__) && defined(__MOVRS__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX10_2_512__) && defined(__MOVRS__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \ - defined(__WIDEKL__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \ - defined(__AMX_INT8__) || defined(__AMX_BF16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP8__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TRANSPOSE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_MOVRS__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_MOVRS__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TF32__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_TF32__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_BF16__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_FP16__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_COMPLEX__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - defined(__AVX512VP2INTERSECT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) #include #include #include @@ -716,33 +504,21 @@ _storebe_i64(void * __P, long long __D) { #include #include #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__) #include #include #include #include #include #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX10_2_512__) && defined(__SM4__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__) #include -#endif #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ diff --git a/lib/include/intrin.h b/lib/include/intrin.h index 376046aeea..588c283cbd 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -162,8 +162,6 @@ void _Store_HLERelease(long volatile *, long); void _Store64_HLERelease(__int64 volatile *, __int64); void _StorePointer_HLERelease(void *volatile *, void *); void _WriteBarrier(void); -unsigned __int32 xbegin(void); -void _xend(void); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ #if defined(__x86_64__) && !defined(__arm64ec__) @@ -372,10 +370,29 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { \*----------------------------------------------------------------------------*/ #if defined(__aarch64__) || defined(__arm64ec__) unsigned __int64 __getReg(int); -long _InterlockedAdd(long volatile *Addend, long Value); -__int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value); +unsigned char _interlockedbittestandreset_acq(long volatile *, long); +unsigned char _interlockedbittestandreset_nf(long volatile *, long); +unsigned char _interlockedbittestandreset_rel(long volatile *, long); +unsigned char _interlockedbittestandreset64_acq(__int64 volatile *, __int64); +unsigned char _interlockedbittestandreset64_nf(__int64 volatile *, __int64); +unsigned char _interlockedbittestandreset64_rel(__int64 volatile *, __int64); +unsigned char _interlockedbittestandset_acq(long volatile *, long); +unsigned char _interlockedbittestandset_nf(long volatile *, long); +unsigned char _interlockedbittestandset_rel(long volatile *, long); +unsigned char _interlockedbittestandset64_acq(__int64 volatile *, __int64); +unsigned char _interlockedbittestandset64_nf(__int64 volatile *, __int64); +unsigned char _interlockedbittestandset64_rel(__int64 volatile *, __int64); +long _InterlockedAdd(long volatile *, long); +long _InterlockedAdd_acq(long volatile *, long); +long _InterlockedAdd_nf(long volatile *, long); +long _InterlockedAdd_rel(long volatile *, long); +__int64 _InterlockedAdd64(__int64 volatile *, __int64); +__int64 _InterlockedAdd64_acq(__int64 volatile *, __int64); +__int64 _InterlockedAdd64_nf(__int64 volatile *, __int64); +__int64 _InterlockedAdd64_rel(__int64 volatile *, __int64); __int64 _ReadStatusReg(int); void _WriteStatusReg(int, __int64); +unsigned int __sys(int, __int64); unsigned short __cdecl _byteswap_ushort(unsigned short val); unsigned long __cdecl _byteswap_ulong (unsigned long val); diff --git a/lib/include/keylockerintrin.h b/lib/include/keylockerintrin.h index f76e91b4d4..4e9e6bec20 100644 --- a/lib/include/keylockerintrin.h +++ b/lib/include/keylockerintrin.h @@ -28,8 +28,6 @@ #ifndef _KEYLOCKERINTRIN_H #define _KEYLOCKERINTRIN_H -#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) - /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ @@ -326,10 +324,6 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { #undef __DEFAULT_FN_ATTRS -#endif /* !defined(__SCE__ || __has_feature(modules) || defined(__KL__) */ - -#if !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__) - /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ @@ -521,7 +515,4 @@ _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* #undef __DEFAULT_FN_ATTRS -#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__) \ - */ - #endif /* _KEYLOCKERINTRIN_H */ diff --git a/lib/include/llvm_libc_wrappers/stdlib.h b/lib/include/llvm_libc_wrappers/stdlib.h index 69afdf4a68..1da22abd0b 100644 --- a/lib/include/llvm_libc_wrappers/stdlib.h +++ b/lib/include/llvm_libc_wrappers/stdlib.h @@ -25,7 +25,7 @@ // The LLVM C library uses these named types so we forward declare them. typedef void (*__atexithandler_t)(void); -typedef int (*__bsearchcompare_t)(const void *, const void *); +typedef int (*__search_compare_t)(const void *, const void *); typedef int (*__qsortcompare_t)(const void *, const void *); typedef int (*__qsortrcompare_t)(const void *, const void *, void *); diff --git a/lib/include/lzcntintrin.h b/lib/include/lzcntintrin.h index 27509021ec..123a42a888 100644 --- a/lib/include/lzcntintrin.h +++ b/lib/include/lzcntintrin.h @@ -14,13 +14,15 @@ #ifndef __LZCNTINTRIN_H #define __LZCNTINTRIN_H -/* Define the default attributes for the functions in this file. */ +/* Define the default attributes for the functions in this file. + Allow using the lzcnt intrinsics even for non-LZCNT targets. Since the LZCNT + intrinsics are mapped to llvm.ctlz.*, false, which can be lowered to BSR on + non-LZCNT targets with zero-value input handled correctly. */ #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr + __attribute__((__always_inline__, __nodebug__)) constexpr #else -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #endif #ifndef _MSC_VER diff --git a/lib/include/module.modulemap b/lib/include/module.modulemap index dcaf09e8f2..a72828625a 100644 --- a/lib/include/module.modulemap +++ b/lib/include/module.modulemap @@ -35,6 +35,14 @@ module _Builtin_intrinsics [system] [extern_c] { } } + explicit module arm64 { + requires arm64 + requires windows + + header "arm64intr.h" + export * + } + explicit module intel { requires x86 export * @@ -231,6 +239,11 @@ module _Builtin_stdbool [system] { export * } +module _Builtin_stdcountof [system] { + header "stdcountof.h" + export * +} + module _Builtin_stddef [system] { textual header "stddef.h" diff --git a/lib/include/movrsintrin.h b/lib/include/movrsintrin.h index 250f4004cd..94510485b2 100644 --- a/lib/include/movrsintrin.h +++ b/lib/include/movrsintrin.h @@ -56,4 +56,4 @@ _m_prefetchrs(volatile const void *__P) { } #undef __DEFAULT_FN_ATTRS -#endif // __MOVRSINTRIN_H \ No newline at end of file +#endif // __MOVRSINTRIN_H diff --git a/lib/include/prfchwintrin.h b/lib/include/prfchwintrin.h index eaea5f3cf8..8ec55d7073 100644 --- a/lib/include/prfchwintrin.h +++ b/lib/include/prfchwintrin.h @@ -14,6 +14,10 @@ #ifndef __PRFCHWINTRIN_H #define __PRFCHWINTRIN_H +#if defined(__cplusplus) +extern "C" { +#endif + /// Loads a memory sequence containing the specified memory address into /// all data cache levels. /// @@ -26,11 +30,7 @@ /// /// \param __P /// A pointer specifying the memory address to be prefetched. -static __inline__ void __attribute__((__always_inline__, __nodebug__)) -_m_prefetch(void *__P) -{ - __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */); -} +void _m_prefetch(void *__P); /// Loads a memory sequence containing the specified memory address into /// the L1 data cache and sets the cache-coherency state to modified. @@ -48,13 +48,10 @@ _m_prefetch(void *__P) /// /// \param __P /// A pointer specifying the memory address to be prefetched. -static __inline__ void __attribute__((__always_inline__, __nodebug__)) -_m_prefetchw(volatile const void *__P) -{ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wcast-qual" - __builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */); -#pragma clang diagnostic pop -} +void _m_prefetchw(volatile const void *__P); + +#if defined(__cplusplus) +} // extern "C" +#endif #endif /* __PRFCHWINTRIN_H */ diff --git a/lib/include/ptrauth.h b/lib/include/ptrauth.h index d489a67c53..7f7d387cbd 100644 --- a/lib/include/ptrauth.h +++ b/lib/include/ptrauth.h @@ -42,6 +42,19 @@ typedef enum { The extra data is always 0. */ ptrauth_key_cxx_vtable_pointer = ptrauth_key_process_independent_data, + /* The key used to sign metadata pointers to Objective-C method-lists. */ + ptrauth_key_method_list_pointer = ptrauth_key_asda, + + /* The key used to sign Objective-C isa and super pointers. */ + ptrauth_key_objc_isa_pointer = ptrauth_key_process_independent_data, + ptrauth_key_objc_super_pointer = ptrauth_key_process_independent_data, + + /* The key used to sign selector pointers */ + ptrauth_key_objc_sel_pointer = ptrauth_key_process_dependent_data, + + /* The key used to sign Objective-C class_ro_t pointers. */ + ptrauth_key_objc_class_ro_pointer = ptrauth_key_process_independent_data, + /* The key used to sign pointers in ELF .init_array/.fini_array. */ ptrauth_key_init_fini_pointer = ptrauth_key_process_independent_code, @@ -259,6 +272,46 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; /* The value is ptrauth_string_discriminator("init_fini") */ #define __ptrauth_init_fini_discriminator 0xd9d4 +/* Objective-C pointer auth ABI qualifiers */ +#define __ptrauth_objc_method_list_imp \ + __ptrauth(ptrauth_key_function_pointer, 1, 0) + +#if __has_feature(ptrauth_objc_method_list_pointer) +#define __ptrauth_objc_method_list_pointer \ + __ptrauth(ptrauth_key_method_list_pointer, 1, 0xC310) +#else +#define __ptrauth_objc_method_list_pointer +#endif + +#define __ptrauth_isa_discriminator 0x6AE1 +#define __ptrauth_super_discriminator 0xB5AB +#define __ptrauth_objc_isa_pointer \ + __ptrauth(ptrauth_key_objc_isa_pointer, 1, __ptrauth_isa_discriminator) +#if __has_feature(ptrauth_restricted_intptr_qualifier) +#define __ptrauth_objc_isa_uintptr \ + __ptrauth_restricted_intptr(ptrauth_key_objc_isa_pointer, 1, \ + __ptrauth_isa_discriminator) +#else +#define __ptrauth_objc_isa_uintptr \ + __ptrauth(ptrauth_key_objc_isa_pointer, 1, __ptrauth_isa_discriminator) +#endif + +#define __ptrauth_objc_super_pointer \ + __ptrauth(ptrauth_key_objc_super_pointer, 1, __ptrauth_super_discriminator) + +#define __ptrauth_objc_sel_discriminator 0x57c2 +#if __has_feature(ptrauth_objc_interface_sel) +#define __ptrauth_objc_sel \ + __ptrauth(ptrauth_key_objc_sel_pointer, 1, __ptrauth_objc_sel_discriminator) +#else +#define __ptrauth_objc_sel +#endif + +#define __ptrauth_objc_class_ro_discriminator 0x61f8 +#define __ptrauth_objc_class_ro \ + __ptrauth(ptrauth_key_objc_class_ro_pointer, 1, \ + __ptrauth_objc_class_ro_discriminator) + #else #define ptrauth_strip(__value, __key) \ @@ -331,6 +384,10 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define ptrauth_cxx_vtable_pointer(key, address_discrimination, \ extra_discrimination...) +#define __ptrauth_objc_isa_pointer +#define __ptrauth_objc_isa_uintptr +#define __ptrauth_objc_super_pointer + #endif /* __has_feature(ptrauth_intrinsics) */ #endif /* __PTRAUTH_H */ diff --git a/lib/include/riscv_corev_alu.h b/lib/include/riscv_corev_alu.h index d2832ddf72..84f4d087e4 100644 --- a/lib/include/riscv_corev_alu.h +++ b/lib/include/riscv_corev_alu.h @@ -24,13 +24,13 @@ static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_abs(long a) { return __builtin_abs(a); } -static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_slet(long a, long b) { - return __builtin_riscv_cv_alu_slet(a, b); +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_sle(long a, long b) { + return __builtin_riscv_cv_alu_sle(a, b); } static __inline__ long __DEFAULT_FN_ATTRS -__riscv_cv_alu_sletu(unsigned long a, unsigned long b) { - return __builtin_riscv_cv_alu_sletu(a, b); +__riscv_cv_alu_sleu(unsigned long a, unsigned long b) { + return __builtin_riscv_cv_alu_sleu(a, b); } static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_min(long a, long b) { diff --git a/lib/include/riscv_vector.h b/lib/include/riscv_vector.h index 0560e82a85..f94f95800d 100644 --- a/lib/include/riscv_vector.h +++ b/lib/include/riscv_vector.h @@ -49,7 +49,6 @@ enum __RISCV_FRM { #define __riscv_vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2) #define __riscv_vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3) -#if __riscv_v_elen >= 64 #define __riscv_vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5) #define __riscv_vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6) #define __riscv_vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7) @@ -58,7 +57,6 @@ enum __RISCV_FRM { #define __riscv_vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1) #define __riscv_vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2) #define __riscv_vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3) -#endif #define __riscv_vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6) #define __riscv_vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7) @@ -78,7 +76,6 @@ enum __RISCV_FRM { #define __riscv_vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2) #define __riscv_vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3) -#if __riscv_v_elen >= 64 #define __riscv_vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5) #define __riscv_vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6) #define __riscv_vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7) @@ -87,7 +84,6 @@ enum __RISCV_FRM { #define __riscv_vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1) #define __riscv_vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2) #define __riscv_vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3) -#endif enum __RISCV_VXRM { diff --git a/lib/include/shaintrin.h b/lib/include/shaintrin.h index 232e1fa298..e21d3bded7 100644 --- a/lib/include/shaintrin.h +++ b/lib/include/shaintrin.h @@ -47,8 +47,9 @@ /// An immediate value where bits [1:0] select among four possible /// combining functions and rounding constants (not specified here). /// \returns A 128-bit vector of [4 x i32] containing the updated SHA-1 state. -#define _mm_sha1rnds4_epu32(V1, V2, M) \ - __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)) +#define _mm_sha1rnds4_epu32(V1, V2, M) \ + ((__m128i)__builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), \ + (__v4si)(__m128i)(V2), (M))) /// Calculates the SHA-1 state variable E from the SHA-1 state variables in /// the 128-bit vector of [4 x i32] in \a __X, adds that to the next set of diff --git a/lib/include/stdcountof.h b/lib/include/stdcountof.h new file mode 100644 index 0000000000..5714e6d6ff --- /dev/null +++ b/lib/include/stdcountof.h @@ -0,0 +1,15 @@ +/*===---- stdcountof.h - Standard header for countof -----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDCOUNTOF_H +#define __STDCOUNTOF_H + +#define countof _Countof + +#endif /* __STDCOUNTOF_H */ diff --git a/lib/include/stdint.h b/lib/include/stdint.h index 01feab7b1e..96c2ccace1 100644 --- a/lib/include/stdint.h +++ b/lib/include/stdint.h @@ -317,166 +317,55 @@ typedef __UINTMAX_TYPE__ uintmax_t; * integer width that the target implements, so corresponding macros are * defined below, too. * - * These macros are defined using the same successive-shrinking approach as - * the type definitions above. It is likewise important that macros are defined - * in order of decending width. - * * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). */ -#define __int_c_join(a, b) a ## b -#define __int_c(v, suffix) __int_c_join(v, suffix) -#define __uint_c(v, suffix) __int_c_join(v##U, suffix) - - -#ifdef __INT64_TYPE__ -# undef __int64_c_suffix -# undef __int32_c_suffix -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT64_C_SUFFIX__ -# define __int64_c_suffix __INT64_C_SUFFIX__ -# define __int32_c_suffix __INT64_C_SUFFIX__ -# define __int16_c_suffix __INT64_C_SUFFIX__ -# define __int8_c_suffix __INT64_C_SUFFIX__ -# endif /* __INT64_C_SUFFIX__ */ -#endif /* __INT64_TYPE__ */ - #ifdef __int_least64_t -# ifdef __int64_c_suffix -# define INT64_C(v) __int_c(v, __int64_c_suffix) -# define UINT64_C(v) __uint_c(v, __int64_c_suffix) -# else -# define INT64_C(v) v -# define UINT64_C(v) v ## U -# endif /* __int64_c_suffix */ +#define INT64_C(v) __INT64_C(v) +#define UINT64_C(v) __UINT64_C(v) #endif /* __int_least64_t */ #ifdef __INT56_TYPE__ -# undef __int32_c_suffix -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT56_C_SUFFIX__ -# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__) -# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__) -# define __int32_c_suffix __INT56_C_SUFFIX__ -# define __int16_c_suffix __INT56_C_SUFFIX__ -# define __int8_c_suffix __INT56_C_SUFFIX__ -# else -# define INT56_C(v) v -# define UINT56_C(v) v ## U -# endif /* __INT56_C_SUFFIX__ */ +#define INT56_C(v) __INT56_C(v) +#define UINT56_C(v) __UINT56_C(v) #endif /* __INT56_TYPE__ */ #ifdef __INT48_TYPE__ -# undef __int32_c_suffix -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT48_C_SUFFIX__ -# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__) -# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__) -# define __int32_c_suffix __INT48_C_SUFFIX__ -# define __int16_c_suffix __INT48_C_SUFFIX__ -# define __int8_c_suffix __INT48_C_SUFFIX__ -# else -# define INT48_C(v) v -# define UINT48_C(v) v ## U -# endif /* __INT48_C_SUFFIX__ */ +#define INT48_C(v) __INT48_C(v) +#define UINT48_C(v) __UINT48_C(v) #endif /* __INT48_TYPE__ */ #ifdef __INT40_TYPE__ -# undef __int32_c_suffix -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT40_C_SUFFIX__ -# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__) -# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__) -# define __int32_c_suffix __INT40_C_SUFFIX__ -# define __int16_c_suffix __INT40_C_SUFFIX__ -# define __int8_c_suffix __INT40_C_SUFFIX__ -# else -# define INT40_C(v) v -# define UINT40_C(v) v ## U -# endif /* __INT40_C_SUFFIX__ */ +#define INT40_C(v) __INT40_C(v) +#define UINT40_C(v) __UINT40_C(v) #endif /* __INT40_TYPE__ */ -#ifdef __INT32_TYPE__ -# undef __int32_c_suffix -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT32_C_SUFFIX__ -# define __int32_c_suffix __INT32_C_SUFFIX__ -# define __int16_c_suffix __INT32_C_SUFFIX__ -# define __int8_c_suffix __INT32_C_SUFFIX__ -# endif /* __INT32_C_SUFFIX__ */ -#endif /* __INT32_TYPE__ */ - #ifdef __int_least32_t -# ifdef __int32_c_suffix -# define INT32_C(v) __int_c(v, __int32_c_suffix) -# define UINT32_C(v) __uint_c(v, __int32_c_suffix) -# else -# define INT32_C(v) v -# define UINT32_C(v) v ## U -# endif /* __int32_c_suffix */ +#define INT32_C(v) __INT32_C(v) +#define UINT32_C(v) __UINT32_C(v) #endif /* __int_least32_t */ #ifdef __INT24_TYPE__ -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT24_C_SUFFIX__ -# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__) -# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__) -# define __int16_c_suffix __INT24_C_SUFFIX__ -# define __int8_c_suffix __INT24_C_SUFFIX__ -# else -# define INT24_C(v) v -# define UINT24_C(v) v ## U -# endif /* __INT24_C_SUFFIX__ */ +#define INT24_C(v) __INT24_C(v) +#define UINT24_C(v) __UINT24_C(v) #endif /* __INT24_TYPE__ */ -#ifdef __INT16_TYPE__ -# undef __int16_c_suffix -# undef __int8_c_suffix -# ifdef __INT16_C_SUFFIX__ -# define __int16_c_suffix __INT16_C_SUFFIX__ -# define __int8_c_suffix __INT16_C_SUFFIX__ -# endif /* __INT16_C_SUFFIX__ */ -#endif /* __INT16_TYPE__ */ - #ifdef __int_least16_t -# ifdef __int16_c_suffix -# define INT16_C(v) __int_c(v, __int16_c_suffix) -# define UINT16_C(v) __uint_c(v, __int16_c_suffix) -# else -# define INT16_C(v) v -# define UINT16_C(v) v ## U -# endif /* __int16_c_suffix */ +#define INT16_C(v) __INT16_C(v) +#define UINT16_C(v) __UINT16_C(v) #endif /* __int_least16_t */ -#ifdef __INT8_TYPE__ -# undef __int8_c_suffix -# ifdef __INT8_C_SUFFIX__ -# define __int8_c_suffix __INT8_C_SUFFIX__ -# endif /* __INT8_C_SUFFIX__ */ -#endif /* __INT8_TYPE__ */ - #ifdef __int_least8_t -# ifdef __int8_c_suffix -# define INT8_C(v) __int_c(v, __int8_c_suffix) -# define UINT8_C(v) __uint_c(v, __int8_c_suffix) -# else -# define INT8_C(v) v -# define UINT8_C(v) v ## U -# endif /* __int8_c_suffix */ +#define INT8_C(v) __INT8_C(v) +#define UINT8_C(v) __UINT8_C(v) #endif /* __int_least8_t */ @@ -938,8 +827,8 @@ typedef __UINTMAX_TYPE__ uintmax_t; #endif /* 7.18.4.2 Macros for greatest-width integer constants. */ -#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__) -#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__) +#define INTMAX_C(v) __INTMAX_C(v) +#define UINTMAX_C(v) __UINTMAX_C(v) /* C23 7.22.3.x Width of other integer types. */ #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L diff --git a/lib/include/vecintrin.h b/lib/include/vecintrin.h index a14c39f9f7..338ea51ce8 100644 --- a/lib/include/vecintrin.h +++ b/lib/include/vecintrin.h @@ -7,6 +7,9 @@ *===-----------------------------------------------------------------------=== */ +#ifndef _VECINTRIN_H +#define _VECINTRIN_H + #if defined(__s390x__) && defined(__VEC__) #define __ATTRS_ai __attribute__((__always_inline__)) @@ -12861,3 +12864,5 @@ vec_search_string_until_zero_cc(__vector unsigned int __a, #error "Use -fzvector to enable vector extensions" #endif + +#endif /* _VECINTRIN_H */ diff --git a/lib/include/x86gprintrin.h b/lib/include/x86gprintrin.h index 3d5cc606d7..8d513ceffb 100644 --- a/lib/include/x86gprintrin.h +++ b/lib/include/x86gprintrin.h @@ -10,33 +10,19 @@ #ifndef __X86GPRINTRIN_H #define __X86GPRINTRIN_H -#if !defined(__SCE__) || __has_feature(modules) || defined(__HRESET__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__UINTR__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__USERMSR__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CRC32__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RAOINT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CMPCCXADD__) #include -#endif #if defined(__i386__) #define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};" diff --git a/lib/include/x86intrin.h b/lib/include/x86intrin.h index f42e9e580f..aaa84365ce 100644 --- a/lib/include/x86intrin.h +++ b/lib/include/x86intrin.h @@ -14,40 +14,22 @@ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE4A__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA4__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__XOP__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__TBM__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__LWP__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__MWAITX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLZERO__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPRU__) #include -#endif #endif /* __X86INTRIN_H */ diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h index 20e66d1901..6a64369773 100644 --- a/lib/include/xmmintrin.h +++ b/lib/include/xmmintrin.h @@ -2198,8 +2198,9 @@ _mm_storer_ps(float *__p, __m128 __a) #define _MM_HINT_NTA 0 #ifndef _MSC_VER -/* FIXME: We have to #define this because "sel" must be a constant integer, and - Sema doesn't do any form of constant propagation yet. */ +// If _MSC_VER is defined, we use the builtin variant of _mm_prefetch. +// Otherwise, we provide this macro, which includes a cast, allowing the user +// to pass a pointer of any time. The _mm_prefetch accepts char to match MSVC. /// Loads one cache line of data from the specified address to a location /// closer to the processor. From e84e9d3a01e4332ad6b7a239c74d823f283d7f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 16 Jul 2025 10:35:26 +0200 Subject: [PATCH 04/43] libcxxabi: update to LLVM 21 --- lib/libcxxabi/src/cxa_default_handlers.cpp | 3 +- lib/libcxxabi/src/demangle/DemangleConfig.h | 8 ++ lib/libcxxabi/src/demangle/ItaniumDemangle.h | 132 +++++++++++-------- lib/libcxxabi/src/demangle/Utility.h | 28 +++- lib/libcxxabi/src/stdlib_new_delete.cpp | 23 ++-- 5 files changed, 124 insertions(+), 70 deletions(-) diff --git a/lib/libcxxabi/src/cxa_default_handlers.cpp b/lib/libcxxabi/src/cxa_default_handlers.cpp index b029982ea8..97a6765fde 100644 --- a/lib/libcxxabi/src/cxa_default_handlers.cpp +++ b/lib/libcxxabi/src/cxa_default_handlers.cpp @@ -9,6 +9,7 @@ // new_handler. //===----------------------------------------------------------------------===// +#include // std::abort #include #include #include "abort_message.h" @@ -94,7 +95,7 @@ static void demangling_unexpected_handler() static constexpr std::terminate_handler default_terminate_handler = demangling_terminate_handler; static constexpr std::terminate_handler default_unexpected_handler = demangling_unexpected_handler; #else // !LIBCXXABI_SILENT_TERMINATE -static constexpr std::terminate_handler default_terminate_handler = ::abort; +static constexpr std::terminate_handler default_terminate_handler = std::abort; static constexpr std::terminate_handler default_unexpected_handler = std::terminate; #endif // !LIBCXXABI_SILENT_TERMINATE diff --git a/lib/libcxxabi/src/demangle/DemangleConfig.h b/lib/libcxxabi/src/demangle/DemangleConfig.h index 06fd223f55..7904e9d1eb 100644 --- a/lib/libcxxabi/src/demangle/DemangleConfig.h +++ b/lib/libcxxabi/src/demangle/DemangleConfig.h @@ -19,6 +19,14 @@ #include "../abort_message.h" #endif +#ifndef _LIBCPP_LOG_HARDENING_FAILURE +// Libc++abi does not have any functionality to log and continue, so we drop +// error messages when we build the demangler with `observe` assertion semantic. +// Once the layering with libc++ is improved, this could use the libc++ +// functionality to log hardening failures. +#define _LIBCPP_LOG_HARDENING_FAILURE(message) ((void)0) +#endif + #include #ifdef _MSC_VER diff --git a/lib/libcxxabi/src/demangle/ItaniumDemangle.h b/lib/libcxxabi/src/demangle/ItaniumDemangle.h index 3df41b5f4d..b306b20134 100644 --- a/lib/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/lib/libcxxabi/src/demangle/ItaniumDemangle.h @@ -21,6 +21,7 @@ #include "Utility.h" #include #include +#include #include #include #include @@ -38,8 +39,10 @@ DEMANGLE_NAMESPACE_BEGIN template class PODSmallVector { - static_assert(std::is_trivial::value, - "T is required to be a trivial type"); + static_assert(std::is_trivially_copyable::value, + "T is required to be a trivially copyable type"); + static_assert(std::is_trivially_default_constructible::value, + "T is required to be trivially default constructible"); T *First = nullptr; T *Last = nullptr; T *Cap = nullptr; @@ -162,18 +165,18 @@ class NodeArray; // traversed by the printLeft/Right functions to produce a demangled string. class Node { public: - enum Kind : unsigned char { + enum Kind : uint8_t { #define NODE(NodeKind) K##NodeKind, #include "ItaniumNodes.def" }; /// Three-way bool to track a cached value. Unknown is possible if this node /// has an unexpanded parameter pack below it that may affect this cache. - enum class Cache : unsigned char { Yes, No, Unknown, }; + enum class Cache : uint8_t { Yes, No, Unknown, }; /// Operator precedence for expression nodes. Used to determine required /// parens in expression emission. - enum class Prec { + enum class Prec : uint8_t { Primary, Postfix, Unary, @@ -281,20 +284,11 @@ public: } void print(OutputBuffer &OB) const { - printLeft(OB); + OB.printLeft(*this); if (RHSComponentCache != Cache::No) - printRight(OB); + OB.printRight(*this); } - // Print the "left" side of this Node into OutputBuffer. - virtual void printLeft(OutputBuffer &) const = 0; - - // Print the "right". This distinction is necessary to represent C++ types - // that appear on the RHS of their subtype, such as arrays or functions. - // Since most types don't have such a component, provide a default - // implementation. - virtual void printRight(OutputBuffer &) const {} - // Print an initializer list of this type. Returns true if we printed a custom // representation, false if nothing has been printed and the default // representation should be used. @@ -310,6 +304,24 @@ public: #ifndef NDEBUG DEMANGLE_DUMP_METHOD void dump() const; #endif + +private: + friend class OutputBuffer; + + // Print the "left" side of this Node into OutputBuffer. + // + // Note, should only be called from OutputBuffer implementations. + // Call \ref OutputBuffer::printLeft instead. + virtual void printLeft(OutputBuffer &) const = 0; + + // Print the "right". This distinction is necessary to represent C++ types + // that appear on the RHS of their subtype, such as arrays or functions. + // Since most types don't have such a component, provide a default + // implementation. + // + // Note, should only be called from OutputBuffer implementations. + // Call \ref OutputBuffer::printRight instead. + virtual void printRight(OutputBuffer &) const {} }; class NodeArray { @@ -458,11 +470,11 @@ public: } void printLeft(OutputBuffer &OB) const override { - Child->printLeft(OB); + OB.printLeft(*Child); printQuals(OB); } - void printRight(OutputBuffer &OB) const override { Child->printRight(OB); } + void printRight(OutputBuffer &OB) const override { OB.printRight(*Child); } }; class ConversionOperatorType final : public Node { @@ -491,7 +503,7 @@ public: template void match(Fn F) const { F(Ty, Postfix); } void printLeft(OutputBuffer &OB) const override { - Ty->printLeft(OB); + OB.printLeft(*Ty); OB += Postfix; } }; @@ -577,7 +589,7 @@ struct AbiTagAttr : Node { std::string_view getBaseName() const override { return Base->getBaseName(); } void printLeft(OutputBuffer &OB) const override { - Base->printLeft(OB); + OB.printLeft(*Base); OB += "[abi:"; OB += Tag; OB += "]"; @@ -603,8 +615,6 @@ class ObjCProtoName : public Node { const Node *Ty; std::string_view Protocol; - friend class PointerType; - public: ObjCProtoName(const Node *Ty_, std::string_view Protocol_) : Node(KObjCProtoName), Ty(Ty_), Protocol(Protocol_) {} @@ -616,6 +626,8 @@ public: static_cast(Ty)->getName() == "objc_object"; } + std::string_view getProtocol() const { return Protocol; } + void printLeft(OutputBuffer &OB) const override { Ty->print(OB); OB += "<"; @@ -644,7 +656,7 @@ public: // We rewrite objc_object* into id. if (Pointee->getKind() != KObjCProtoName || !static_cast(Pointee)->isObjCObject()) { - Pointee->printLeft(OB); + OB.printLeft(*Pointee); if (Pointee->hasArray(OB)) OB += " "; if (Pointee->hasArray(OB) || Pointee->hasFunction(OB)) @@ -653,7 +665,7 @@ public: } else { const auto *objcProto = static_cast(Pointee); OB += "id<"; - OB += objcProto->Protocol; + OB += objcProto->getProtocol(); OB += ">"; } } @@ -663,7 +675,7 @@ public: !static_cast(Pointee)->isObjCObject()) { if (Pointee->hasArray(OB) || Pointee->hasFunction(OB)) OB += ")"; - Pointee->printRight(OB); + OB.printRight(*Pointee); } } }; @@ -729,7 +741,7 @@ public: std::pair Collapsed = collapse(OB); if (!Collapsed.second) return; - Collapsed.second->printLeft(OB); + OB.printLeft(*Collapsed.second); if (Collapsed.second->hasArray(OB)) OB += " "; if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB)) @@ -746,7 +758,7 @@ public: return; if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB)) OB += ")"; - Collapsed.second->printRight(OB); + OB.printRight(*Collapsed.second); } }; @@ -766,7 +778,7 @@ public: } void printLeft(OutputBuffer &OB) const override { - MemberType->printLeft(OB); + OB.printLeft(*MemberType); if (MemberType->hasArray(OB) || MemberType->hasFunction(OB)) OB += "("; else @@ -778,7 +790,7 @@ public: void printRight(OutputBuffer &OB) const override { if (MemberType->hasArray(OB) || MemberType->hasFunction(OB)) OB += ")"; - MemberType->printRight(OB); + OB.printRight(*MemberType); } }; @@ -798,7 +810,7 @@ public: bool hasRHSComponentSlow(OutputBuffer &) const override { return true; } bool hasArraySlow(OutputBuffer &) const override { return true; } - void printLeft(OutputBuffer &OB) const override { Base->printLeft(OB); } + void printLeft(OutputBuffer &OB) const override { OB.printLeft(*Base); } void printRight(OutputBuffer &OB) const override { if (OB.back() != ']') @@ -807,7 +819,7 @@ public: if (Dimension) Dimension->print(OB); OB += "]"; - Base->printRight(OB); + OB.printRight(*Base); } bool printInitListAsType(OutputBuffer &OB, @@ -851,7 +863,7 @@ public: // by printing out the return types's left, then print our parameters, then // finally print right of the return type. void printLeft(OutputBuffer &OB) const override { - Ret->printLeft(OB); + OB.printLeft(*Ret); OB += " "; } @@ -859,7 +871,7 @@ public: OB.printOpen(); Params.printWithComma(OB); OB.printClose(); - Ret->printRight(OB); + OB.printRight(*Ret); if (CVQuals & QualConst) OB += " const"; @@ -964,6 +976,8 @@ public: FunctionRefQual getRefQual() const { return RefQual; } NodeArray getParams() const { return Params; } const Node *getReturnType() const { return Ret; } + const Node *getAttrs() const { return Attrs; } + const Node *getRequires() const { return Requires; } bool hasRHSComponentSlow(OutputBuffer &) const override { return true; } bool hasFunctionSlow(OutputBuffer &) const override { return true; } @@ -972,10 +986,11 @@ public: void printLeft(OutputBuffer &OB) const override { if (Ret) { - Ret->printLeft(OB); + OB.printLeft(*Ret); if (!Ret->hasRHSComponent(OB)) OB += " "; } + Name->print(OB); } @@ -983,8 +998,9 @@ public: OB.printOpen(); Params.printWithComma(OB); OB.printClose(); + if (Ret) - Ret->printRight(OB); + OB.printRight(*Ret); if (CVQuals & QualConst) OB += " const"; @@ -1324,14 +1340,14 @@ public: template void match(Fn F) const { F(Name, Type); } void printLeft(OutputBuffer &OB) const override { - Type->printLeft(OB); + OB.printLeft(*Type); if (!Type->hasRHSComponent(OB)) OB += " "; } void printRight(OutputBuffer &OB) const override { Name->print(OB); - Type->printRight(OB); + OB.printRight(*Type); } }; @@ -1376,11 +1392,11 @@ public: template void match(Fn F) const { F(Param); } void printLeft(OutputBuffer &OB) const override { - Param->printLeft(OB); + OB.printLeft(*Param); OB += "..."; } - void printRight(OutputBuffer &OB) const override { Param->printRight(OB); } + void printRight(OutputBuffer &OB) const override { OB.printRight(*Param); } }; /// An unexpanded parameter pack (either in the expression or type context). If @@ -1445,13 +1461,13 @@ public: initializePackExpansion(OB); size_t Idx = OB.CurrentPackIndex; if (Idx < Data.size()) - Data[Idx]->printLeft(OB); + OB.printLeft(*Data[Idx]); } void printRight(OutputBuffer &OB) const override { initializePackExpansion(OB); size_t Idx = OB.CurrentPackIndex; if (Idx < Data.size()) - Data[Idx]->printRight(OB); + OB.printRight(*Data[Idx]); } }; @@ -1609,13 +1625,13 @@ struct ForwardTemplateReference : Node { if (Printing) return; ScopedOverride SavePrinting(Printing, true); - Ref->printLeft(OB); + OB.printLeft(*Ref); } void printRight(OutputBuffer &OB) const override { if (Printing) return; ScopedOverride SavePrinting(Printing, true); - Ref->printRight(OB); + OB.printRight(*Ref); } }; @@ -1767,7 +1783,7 @@ public: void printLeft(OutputBuffer &OB) const override { OB += "~"; - Base->printLeft(OB); + OB.printLeft(*Base); } }; @@ -2047,7 +2063,7 @@ public: { ScopedOverride LT(OB.GtIsGt, 0); OB += "<"; - To->printLeft(OB); + OB.printLeft(*To); OB += ">"; } OB.printOpen(); @@ -3406,7 +3422,7 @@ const typename AbstractManglingParser< {"or", OperatorInfo::Binary, false, Node::Prec::Ior, "operator|"}, {"pL", OperatorInfo::Binary, false, Node::Prec::Assign, "operator+="}, {"pl", OperatorInfo::Binary, false, Node::Prec::Additive, "operator+"}, - {"pm", OperatorInfo::Member, /*Named*/ false, Node::Prec::PtrMem, + {"pm", OperatorInfo::Member, /*Named*/ true, Node::Prec::PtrMem, "operator->*"}, {"pp", OperatorInfo::Postfix, false, Node::Prec::Postfix, "operator++"}, {"ps", OperatorInfo::Prefix, false, Node::Prec::Unary, "operator+"}, @@ -4452,7 +4468,9 @@ Node *AbstractManglingParser::parseType() { return nullptr; if (!consumeIf('_')) return nullptr; - return make(Size, Signed); + // The front end expects this to be available for Substitution + Result = make(Size, Signed); + break; } // ::= Di # char32_t case 'i': @@ -5739,14 +5757,16 @@ struct FloatData template <> struct FloatData { -#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) || defined(__riscv) || defined(__loongarch__) || \ - defined(__ve__) - static const size_t mangled_size = 32; -#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) - static const size_t mangled_size = 16; +#if __LDBL_MANT_DIG__ == 113 || __LDBL_MANT_DIG__ == 106 + static const size_t mangled_size = 32; +#elif __LDBL_MANT_DIG__ == 53 || defined(_MSC_VER) + // MSVC doesn't define __LDBL_MANT_DIG__, but it has long double equal to + // regular double on all current architectures. + static const size_t mangled_size = 16; +#elif __LDBL_MANT_DIG__ == 64 + static const size_t mangled_size = 20; #else - static const size_t mangled_size = 20; // May need to be adjusted to 16 or 24 on other platforms +#error Unknown size for __LDBL_MANT_DIG__ #endif // `-0x1.ffffffffffffffffffffffffffffp+16383` + 'L' + '\0' == 42 bytes. // 28 'f's * 4 bits == 112 bits, which is the number of mantissa bits. @@ -6176,6 +6196,10 @@ struct ManglingParser : AbstractManglingParser, Alloc> { Alloc>::AbstractManglingParser; }; +inline void OutputBuffer::printLeft(const Node &N) { N.printLeft(*this); } + +inline void OutputBuffer::printRight(const Node &N) { N.printRight(*this); } + DEMANGLE_NAMESPACE_END #if defined(__clang__) diff --git a/lib/libcxxabi/src/demangle/Utility.h b/lib/libcxxabi/src/demangle/Utility.h index f1fad35d60..8829f3fa13 100644 --- a/lib/libcxxabi/src/demangle/Utility.h +++ b/lib/libcxxabi/src/demangle/Utility.h @@ -27,6 +27,8 @@ DEMANGLE_NAMESPACE_BEGIN +class Node; + // Stream that AST nodes write their string representation into after the AST // has been parsed. class OutputBuffer { @@ -79,10 +81,24 @@ public: OutputBuffer(const OutputBuffer &) = delete; OutputBuffer &operator=(const OutputBuffer &) = delete; + virtual ~OutputBuffer() {} + operator std::string_view() const { return std::string_view(Buffer, CurrentPosition); } + /// Called by the demangler when printing the demangle tree. By + /// default calls into \c Node::print{Left|Right} but can be overriden + /// by clients to track additional state when printing the demangled name. + virtual void printLeft(const Node &N); + virtual void printRight(const Node &N); + + /// Called when we write to this object anywhere other than the end. + virtual void notifyInsertion(size_t /*Position*/, size_t /*Count*/) {} + + /// Called when we make the \c CurrentPosition of this object smaller. + virtual void notifyDeletion(size_t /*OldPos*/, size_t /*NewPos*/) {} + /// If a ParameterPackExpansion (or similar type) is encountered, the offset /// into the pack that we're currently printing. unsigned CurrentPackIndex = std::numeric_limits::max(); @@ -120,12 +136,16 @@ public: OutputBuffer &prepend(std::string_view R) { size_t Size = R.size(); + if (!Size) + return *this; grow(Size); std::memmove(Buffer + Size, Buffer, CurrentPosition); std::memcpy(Buffer, &*R.begin(), Size); CurrentPosition += Size; + notifyInsertion(/*Position=*/0, /*Count=*/Size); + return *this; } @@ -161,14 +181,20 @@ public: DEMANGLE_ASSERT(Pos <= CurrentPosition, ""); if (N == 0) return; + grow(N); std::memmove(Buffer + Pos + N, Buffer + Pos, CurrentPosition - Pos); std::memcpy(Buffer + Pos, S, N); CurrentPosition += N; + + notifyInsertion(Pos, N); } size_t getCurrentPosition() const { return CurrentPosition; } - void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; } + void setCurrentPosition(size_t NewPos) { + notifyDeletion(CurrentPosition, NewPos); + CurrentPosition = NewPos; + } char back() const { DEMANGLE_ASSERT(CurrentPosition, ""); diff --git a/lib/libcxxabi/src/stdlib_new_delete.cpp b/lib/libcxxabi/src/stdlib_new_delete.cpp index f386b28f0c..b5ed59958d 100644 --- a/lib/libcxxabi/src/stdlib_new_delete.cpp +++ b/lib/libcxxabi/src/stdlib_new_delete.cpp @@ -63,7 +63,7 @@ static void* operator_new_impl(std::size_t size) { return p; } -_LIBCPP_MAKE_OVERRIDABLE_FUNCTION_DETECTABLE _LIBCPP_WEAK void* operator new(std::size_t size) _THROW_BAD_ALLOC { +_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new, (std::size_t size)) _THROW_BAD_ALLOC { void* p = operator_new_impl(size); if (p == nullptr) __throw_bad_alloc_shim(); @@ -74,7 +74,7 @@ _LIBCPP_WEAK void* operator new(size_t size, const std::nothrow_t&) noexcept { #if !_LIBCPP_HAS_EXCEPTIONS # if _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION _LIBCPP_ASSERT_SHIM( - !std::__is_function_overridden(static_cast(&operator new)), + (!std::__is_function_overridden < void*(std::size_t), &operator new>()), "libc++ was configured with exceptions disabled and `operator new(size_t)` has been overridden, " "but `operator new(size_t, nothrow_t)` has not been overridden. This is problematic because " "`operator new(size_t, nothrow_t)` must call `operator new(size_t)`, which will terminate in case " @@ -94,15 +94,13 @@ _LIBCPP_WEAK void* operator new(size_t size, const std::nothrow_t&) noexcept { #endif } -_LIBCPP_MAKE_OVERRIDABLE_FUNCTION_DETECTABLE _LIBCPP_WEAK void* operator new[](size_t size) _THROW_BAD_ALLOC { - return ::operator new(size); -} +_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new[], (size_t size)) _THROW_BAD_ALLOC { return ::operator new(size); } _LIBCPP_WEAK void* operator new[](size_t size, const std::nothrow_t&) noexcept { #if !_LIBCPP_HAS_EXCEPTIONS # if _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION _LIBCPP_ASSERT_SHIM( - !std::__is_function_overridden(static_cast(&operator new[])), + (!std::__is_function_overridden < void*(std::size_t), &operator new[]>()), "libc++ was configured with exceptions disabled and `operator new[](size_t)` has been overridden, " "but `operator new[](size_t, nothrow_t)` has not been overridden. This is problematic because " "`operator new[](size_t, nothrow_t)` must call `operator new[](size_t)`, which will terminate in case " @@ -156,8 +154,7 @@ static void* operator_new_aligned_impl(std::size_t size, std::align_val_t alignm return p; } -_LIBCPP_MAKE_OVERRIDABLE_FUNCTION_DETECTABLE _LIBCPP_WEAK void* -operator new(std::size_t size, std::align_val_t alignment) _THROW_BAD_ALLOC { +_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new, (std::size_t size, std::align_val_t alignment)) _THROW_BAD_ALLOC { void* p = operator_new_aligned_impl(size, alignment); if (p == nullptr) __throw_bad_alloc_shim(); @@ -168,7 +165,7 @@ _LIBCPP_WEAK void* operator new(size_t size, std::align_val_t alignment, const s # if !_LIBCPP_HAS_EXCEPTIONS # if _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION _LIBCPP_ASSERT_SHIM( - !std::__is_function_overridden(static_cast(&operator new)), + (!std::__is_function_overridden < void*(std::size_t, std::align_val_t), &operator new>()), "libc++ was configured with exceptions disabled and `operator new(size_t, align_val_t)` has been overridden, " "but `operator new(size_t, align_val_t, nothrow_t)` has not been overridden. This is problematic because " "`operator new(size_t, align_val_t, nothrow_t)` must call `operator new(size_t, align_val_t)`, which will " @@ -188,8 +185,7 @@ _LIBCPP_WEAK void* operator new(size_t size, std::align_val_t alignment, const s # endif } -_LIBCPP_MAKE_OVERRIDABLE_FUNCTION_DETECTABLE _LIBCPP_WEAK void* -operator new[](size_t size, std::align_val_t alignment) _THROW_BAD_ALLOC { +_LIBCPP_OVERRIDABLE_FUNCTION(void*, operator new[], (size_t size, std::align_val_t alignment)) _THROW_BAD_ALLOC { return ::operator new(size, alignment); } @@ -197,14 +193,13 @@ _LIBCPP_WEAK void* operator new[](size_t size, std::align_val_t alignment, const # if !_LIBCPP_HAS_EXCEPTIONS # if _LIBCPP_CAN_DETECT_OVERRIDDEN_FUNCTION _LIBCPP_ASSERT_SHIM( - !std::__is_function_overridden(static_cast(&operator new[])), + (!std::__is_function_overridden < void*(std::size_t, std::align_val_t), &operator new[]>()), "libc++ was configured with exceptions disabled and `operator new[](size_t, align_val_t)` has been overridden, " "but `operator new[](size_t, align_val_t, nothrow_t)` has not been overridden. This is problematic because " "`operator new[](size_t, align_val_t, nothrow_t)` must call `operator new[](size_t, align_val_t)`, which will " "terminate in case it fails to allocate, making it impossible for `operator new[](size_t, align_val_t, " "nothrow_t)` to fulfill its contract (since it should return nullptr upon failure). Please make sure you " - "override " - "`operator new[](size_t, align_val_t, nothrow_t)` as well."); + "override `operator new[](size_t, align_val_t, nothrow_t)` as well."); # endif return operator_new_aligned_impl(size, alignment); From d9f0fbf9838060b1e8c2ec0df21b43e75430350f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 16 Jul 2025 10:46:24 +0200 Subject: [PATCH 05/43] libcxx: update to LLVM 21 --- lib/libcxx/include/__algorithm/copy.h | 134 +- .../include/__algorithm/copy_backward.h | 131 + lib/libcxx/include/__algorithm/count.h | 10 +- lib/libcxx/include/__algorithm/equal.h | 160 + lib/libcxx/include/__algorithm/fill_n.h | 12 +- lib/libcxx/include/__algorithm/find.h | 10 +- lib/libcxx/include/__algorithm/for_each.h | 47 +- lib/libcxx/include/__algorithm/for_each_n.h | 77 +- .../include/__algorithm/for_each_n_segment.h | 63 + .../include/__algorithm/inplace_merge.h | 11 +- lib/libcxx/include/__algorithm/min_element.h | 2 +- lib/libcxx/include/__algorithm/move.h | 10 + .../include/__algorithm/move_backward.h | 10 + .../include/__algorithm/out_value_result.h | 56 + lib/libcxx/include/__algorithm/radix_sort.h | 125 +- .../include/__algorithm/ranges_for_each.h | 18 +- .../include/__algorithm/ranges_for_each_n.h | 9 +- .../__algorithm/ranges_inplace_merge.h | 6 +- .../__algorithm/ranges_iterator_concept.h | 2 +- lib/libcxx/include/__algorithm/ranges_max.h | 6 +- .../include/__algorithm/ranges_max_element.h | 6 +- lib/libcxx/include/__algorithm/ranges_min.h | 6 +- .../include/__algorithm/ranges_min_element.h | 19 +- .../__algorithm/ranges_stable_partition.h | 7 +- .../include/__algorithm/ranges_stable_sort.h | 8 +- lib/libcxx/include/__algorithm/rotate.h | 45 + lib/libcxx/include/__algorithm/simd_utils.h | 16 +- lib/libcxx/include/__algorithm/sort.h | 39 +- .../include/__algorithm/stable_partition.h | 21 +- lib/libcxx/include/__algorithm/stable_sort.h | 28 +- lib/libcxx/include/__algorithm/swap_ranges.h | 162 + lib/libcxx/include/__assert | 4 +- lib/libcxx/include/__assertion_handler | 15 +- lib/libcxx/include/__atomic/atomic.h | 21 +- lib/libcxx/include/__atomic/atomic_ref.h | 2 +- lib/libcxx/include/__atomic/memory_order.h | 4 +- lib/libcxx/include/__atomic/support.h | 3 - lib/libcxx/include/__atomic/support/c11.h | 2 +- lib/libcxx/include/__bit/bit_ceil.h | 4 +- lib/libcxx/include/__bit/bit_floor.h | 5 +- lib/libcxx/include/__bit/bit_log2.h | 10 +- lib/libcxx/include/__bit/bit_width.h | 4 +- lib/libcxx/include/__bit/countl.h | 72 +- lib/libcxx/include/__bit/countr.h | 45 +- lib/libcxx/include/__bit/has_single_bit.h | 4 +- lib/libcxx/include/__bit/popcount.h | 44 +- lib/libcxx/include/__bit/rotate.h | 11 +- lib/libcxx/include/__bit_reference | 708 +--- lib/libcxx/include/__charconv/tables.h | 20 +- .../include/__charconv/to_chars_base_10.h | 32 +- .../include/__charconv/to_chars_integral.h | 87 +- .../include/__charconv/to_chars_result.h | 9 + lib/libcxx/include/__charconv/traits.h | 34 +- lib/libcxx/include/__chrono/convert_to_tm.h | 34 +- lib/libcxx/include/__chrono/duration.h | 10 +- lib/libcxx/include/__chrono/formatter.h | 78 +- lib/libcxx/include/__chrono/gps_clock.h | 90 + lib/libcxx/include/__chrono/ostream.h | 14 + .../include/__chrono/parser_std_format_spec.h | 2 +- lib/libcxx/include/__chrono/tai_clock.h | 108 + lib/libcxx/include/__chrono/time_point.h | 18 +- .../__compare/common_comparison_category.h | 6 +- .../include/__compare/compare_three_way.h | 2 +- .../__compare/compare_three_way_result.h | 6 +- lib/libcxx/include/__concepts/arithmetic.h | 13 - lib/libcxx/include/__concepts/class_or_enum.h | 1 - lib/libcxx/include/__concepts/common_with.h | 2 +- lib/libcxx/include/__concepts/swappable.h | 1 - .../__condition_variable/condition_variable.h | 200 +- lib/libcxx/include/__config | 348 +- lib/libcxx/include/__configuration/abi.h | 136 +- .../include/__configuration/availability.h | 65 +- lib/libcxx/include/__configuration/compiler.h | 4 +- lib/libcxx/include/__configuration/platform.h | 7 + .../include/__coroutine/coroutine_handle.h | 8 +- .../include/__coroutine/coroutine_traits.h | 2 +- .../__coroutine/noop_coroutine_handle.h | 4 +- .../include/__coroutine/trivial_awaitables.h | 2 +- lib/libcxx/include/__cstddef/byte.h | 4 +- lib/libcxx/include/__debug_utils/sanitizers.h | 10 +- lib/libcxx/include/__exception/exception.h | 4 +- .../include/__exception/exception_ptr.h | 74 +- .../include/__exception/nested_exception.h | 4 +- lib/libcxx/include/__exception/operations.h | 4 +- lib/libcxx/include/__exception/terminate.h | 4 +- lib/libcxx/include/__expected/expected.h | 45 +- .../include/__filesystem/directory_entry.h | 2 +- lib/libcxx/include/__filesystem/operations.h | 6 +- lib/libcxx/include/__filesystem/path.h | 5 +- lib/libcxx/include/__filesystem/u8path.h | 7 +- lib/libcxx/include/__flat_map/flat_map.h | 453 ++- lib/libcxx/include/__flat_map/flat_multimap.h | 27 +- .../include/__flat_map/key_value_iterator.h | 86 +- lib/libcxx/include/__flat_map/utils.h | 26 +- lib/libcxx/include/__flat_set/flat_multiset.h | 792 ++++ lib/libcxx/include/__flat_set/flat_set.h | 874 +++++ lib/libcxx/include/__flat_set/ra_iterator.h | 157 + lib/libcxx/include/__flat_set/utils.h | 82 + lib/libcxx/include/__format/buffer.h | 29 +- .../include/__format/container_adaptor.h | 8 +- .../include/__format/escaped_output_table.h | 82 +- .../extended_grapheme_cluster_table.h | 99 +- lib/libcxx/include/__format/format_arg.h | 6 +- .../include/__format/format_arg_store.h | 38 +- lib/libcxx/include/__format/format_args.h | 2 +- lib/libcxx/include/__format/format_context.h | 13 +- .../include/__format/format_functions.h | 53 +- .../include/__format/format_parse_context.h | 2 +- lib/libcxx/include/__format/format_string.h | 2 +- .../include/__format/format_to_n_result.h | 2 +- lib/libcxx/include/__format/formatter.h | 16 +- lib/libcxx/include/__format/formatter_bool.h | 2 +- lib/libcxx/include/__format/formatter_char.h | 8 +- .../__format/formatter_floating_point.h | 11 +- .../include/__format/formatter_integer.h | 26 +- .../include/__format/formatter_integral.h | 8 +- .../include/__format/formatter_output.h | 18 - .../include/__format/formatter_pointer.h | 8 +- .../include/__format/formatter_string.h | 32 +- lib/libcxx/include/__format/formatter_tuple.h | 8 +- .../__format/indic_conjunct_break_table.h | 312 +- .../include/__format/parser_std_format_spec.h | 2 +- .../__format/range_default_formatter.h | 14 +- lib/libcxx/include/__format/range_formatter.h | 2 +- .../include/__format/width_estimation_table.h | 19 +- .../include/__functional/binary_function.h | 7 +- .../include/__functional/binary_negate.h | 2 +- lib/libcxx/include/__functional/bind.h | 6 +- lib/libcxx/include/__functional/binder1st.h | 2 +- lib/libcxx/include/__functional/binder2nd.h | 2 +- .../__functional/boyer_moore_searcher.h | 16 +- .../include/__functional/default_searcher.h | 2 +- lib/libcxx/include/__functional/function.h | 383 +- lib/libcxx/include/__functional/hash.h | 170 +- lib/libcxx/include/__functional/mem_fun_ref.h | 17 +- lib/libcxx/include/__functional/operations.h | 81 +- .../__functional/pointer_to_binary_function.h | 3 +- .../__functional/pointer_to_unary_function.h | 3 +- .../include/__functional/reference_wrapper.h | 48 +- .../include/__functional/unary_function.h | 7 +- .../include/__functional/unary_negate.h | 3 +- .../include/__functional/weak_result_type.h | 2 + lib/libcxx/include/__fwd/array.h | 2 +- lib/libcxx/include/__fwd/bit_reference.h | 16 + lib/libcxx/include/__fwd/byte.h | 4 +- lib/libcxx/include/__fwd/complex.h | 2 +- lib/libcxx/include/__fwd/deque.h | 2 +- lib/libcxx/include/__fwd/format.h | 6 +- lib/libcxx/include/__fwd/fstream.h | 8 +- lib/libcxx/include/__fwd/functional.h | 13 +- lib/libcxx/include/__fwd/ios.h | 2 +- lib/libcxx/include/__fwd/istream.h | 4 +- lib/libcxx/include/__fwd/map.h | 31 + lib/libcxx/include/__fwd/memory.h | 4 +- lib/libcxx/include/__fwd/memory_resource.h | 2 +- lib/libcxx/include/__fwd/ostream.h | 2 +- lib/libcxx/include/__fwd/pair.h | 8 +- lib/libcxx/include/__fwd/queue.h | 4 +- lib/libcxx/include/__fwd/set.h | 30 + lib/libcxx/include/__fwd/sstream.h | 8 +- lib/libcxx/include/__fwd/stack.h | 2 +- lib/libcxx/include/__fwd/streambuf.h | 2 +- lib/libcxx/include/__fwd/string.h | 4 +- lib/libcxx/include/__fwd/string_view.h | 2 +- lib/libcxx/include/__fwd/subrange.h | 2 +- lib/libcxx/include/__fwd/tuple.h | 6 +- lib/libcxx/include/__fwd/variant.h | 31 +- lib/libcxx/include/__fwd/vector.h | 2 +- lib/libcxx/include/__hash_table | 185 +- lib/libcxx/include/__ios/fpos.h | 2 +- lib/libcxx/include/__iterator/advance.h | 16 +- .../include/__iterator/aliasing_iterator.h | 9 +- .../include/__iterator/back_insert_iterator.h | 2 +- .../include/__iterator/common_iterator.h | 7 +- lib/libcxx/include/__iterator/concepts.h | 51 +- .../__iterator/front_insert_iterator.h | 2 +- .../include/__iterator/insert_iterator.h | 2 +- .../include/__iterator/istream_iterator.h | 5 +- .../include/__iterator/istreambuf_iterator.h | 2 +- lib/libcxx/include/__iterator/iter_move.h | 3 +- lib/libcxx/include/__iterator/iterator.h | 2 +- .../include/__iterator/iterator_traits.h | 197 +- lib/libcxx/include/__iterator/move_iterator.h | 2 +- lib/libcxx/include/__iterator/move_sentinel.h | 2 +- lib/libcxx/include/__iterator/next.h | 6 - .../include/__iterator/ostream_iterator.h | 2 +- .../include/__iterator/ostreambuf_iterator.h | 2 +- lib/libcxx/include/__iterator/prev.h | 5 - .../include/__iterator/product_iterator.h | 76 + .../include/__iterator/reverse_iterator.h | 2 +- .../include/__iterator/segmented_iterator.h | 6 + lib/libcxx/include/__iterator/wrap_iter.h | 6 +- lib/libcxx/include/__locale | 236 +- .../include/__locale_dir/check_grouping.h | 31 + .../include/__locale_dir/get_c_locale.h | 40 + .../include/__locale_dir/locale_base_api.h | 118 +- lib/libcxx/include/__locale_dir/messages.h | 143 + lib/libcxx/include/__locale_dir/money.h | 873 ++++ lib/libcxx/include/__locale_dir/num.h | 1072 +++++ .../include/__locale_dir/scan_keyword.h | 143 + .../include/__locale_dir/support/apple.h | 2 - .../include/__locale_dir/support/bsd_like.h | 15 +- .../include/__locale_dir/support/freebsd.h | 2 - .../include/__locale_dir/support/fuchsia.h | 4 +- .../include/__locale_dir/support/linux.h | 281 ++ .../include/__locale_dir/support/netbsd.h | 2 + .../support/no_locale/characters.h | 6 - .../include/__locale_dir/support/windows.h | 10 +- lib/libcxx/include/__locale_dir/time.h | 766 ++++ .../include/__locale_dir/wbuffer_convert.h | 430 ++ .../include/__locale_dir/wstring_convert.h | 254 ++ lib/libcxx/include/__log_hardening_failure | 42 + lib/libcxx/include/__math/abs.h | 24 + lib/libcxx/include/__math/copysign.h | 2 +- .../include/__math/exponential_functions.h | 4 +- lib/libcxx/include/__math/fdim.h | 4 +- lib/libcxx/include/__math/fma.h | 4 +- lib/libcxx/include/__math/hypot.h | 8 +- .../__math/inverse_trigonometric_functions.h | 4 +- lib/libcxx/include/__math/min_max.h | 8 +- lib/libcxx/include/__math/modulo.h | 4 +- lib/libcxx/include/__math/remainder.h | 8 +- .../include/__math/rounding_functions.h | 4 +- lib/libcxx/include/__math/traits.h | 20 +- lib/libcxx/include/__mbstate_t.h | 8 +- .../include/__mdspan/aligned_accessor.h | 87 + lib/libcxx/include/__mdspan/extents.h | 10 +- lib/libcxx/include/__mdspan/layout_left.h | 3 +- lib/libcxx/include/__mdspan/layout_right.h | 3 +- lib/libcxx/include/__mdspan/layout_stride.h | 10 +- lib/libcxx/include/__mdspan/mdspan.h | 12 +- lib/libcxx/include/__memory/addressof.h | 4 +- .../include/__memory/allocation_guard.h | 20 +- lib/libcxx/include/__memory/allocator.h | 6 +- lib/libcxx/include/__memory/allocator_arg_t.h | 2 +- .../include/__memory/allocator_traits.h | 207 +- lib/libcxx/include/__memory/auto_ptr.h | 4 +- lib/libcxx/include/__memory/compressed_pair.h | 67 +- lib/libcxx/include/__memory/construct_at.h | 42 +- lib/libcxx/include/__memory/destroy.h | 71 + lib/libcxx/include/__memory/inout_ptr.h | 2 +- .../__memory/is_sufficiently_aligned.h | 34 + lib/libcxx/include/__memory/out_ptr.h | 2 +- lib/libcxx/include/__memory/pointer_traits.h | 144 +- .../include/__memory/ranges_construct_at.h | 35 - lib/libcxx/include/__memory/ranges_destroy.h | 79 + .../include/__memory/raw_storage_iterator.h | 2 +- lib/libcxx/include/__memory/shared_count.h | 5 +- lib/libcxx/include/__memory/shared_ptr.h | 42 +- .../__memory/uninitialized_algorithms.h | 33 +- lib/libcxx/include/__memory/unique_ptr.h | 64 +- lib/libcxx/include/__memory/uses_allocator.h | 2 +- .../__memory/uses_allocator_construction.h | 9 +- .../__memory_resource/polymorphic_allocator.h | 4 +- lib/libcxx/include/__mutex/lock_guard.h | 10 +- lib/libcxx/include/__mutex/mutex.h | 8 +- lib/libcxx/include/__mutex/once_flag.h | 11 +- lib/libcxx/include/__mutex/unique_lock.h | 20 +- lib/libcxx/include/__new/align_val_t.h | 5 +- lib/libcxx/include/__new/allocate.h | 71 +- .../include/__new/destroying_delete_t.h | 5 +- lib/libcxx/include/__new/exceptions.h | 5 +- lib/libcxx/include/__new/new_handler.h | 5 +- lib/libcxx/include/__new/nothrow_t.h | 5 +- lib/libcxx/include/__node_handle | 13 +- lib/libcxx/include/__numeric/gcd_lcm.h | 5 +- lib/libcxx/include/__numeric/ranges_iota.h | 65 + .../include/__numeric/saturation_arithmetic.h | 37 +- lib/libcxx/include/__ostream/basic_ostream.h | 21 +- lib/libcxx/include/__ostream/print.h | 16 +- .../include/__pstl/backends/libdispatch.h | 1 + .../include/__random/bernoulli_distribution.h | 4 +- .../include/__random/binomial_distribution.h | 4 +- .../include/__random/cauchy_distribution.h | 4 +- .../__random/chi_squared_distribution.h | 4 +- .../include/__random/clamp_to_integral.h | 2 +- .../include/__random/discard_block_engine.h | 2 +- .../include/__random/discrete_distribution.h | 4 +- .../__random/exponential_distribution.h | 4 +- .../__random/extreme_value_distribution.h | 4 +- .../include/__random/fisher_f_distribution.h | 4 +- .../include/__random/gamma_distribution.h | 4 +- .../include/__random/geometric_distribution.h | 4 +- .../__random/independent_bits_engine.h | 2 +- .../__random/linear_congruential_engine.h | 4 +- .../include/__random/lognormal_distribution.h | 4 +- .../__random/mersenne_twister_engine.h | 4 +- .../__random/negative_binomial_distribution.h | 4 +- .../include/__random/normal_distribution.h | 4 +- .../piecewise_constant_distribution.h | 4 +- .../__random/piecewise_linear_distribution.h | 4 +- .../include/__random/poisson_distribution.h | 4 +- lib/libcxx/include/__random/seed_seq.h | 2 +- .../include/__random/shuffle_order_engine.h | 2 +- .../include/__random/student_t_distribution.h | 4 +- .../__random/subtract_with_carry_engine.h | 4 +- .../__random/uniform_real_distribution.h | 4 +- .../include/__random/weibull_distribution.h | 4 +- lib/libcxx/include/__ranges/concepts.h | 40 + lib/libcxx/include/__ranges/drop_view.h | 8 +- lib/libcxx/include/__ranges/elements_view.h | 2 +- lib/libcxx/include/__ranges/enable_view.h | 7 +- lib/libcxx/include/__ranges/join_with_view.h | 460 +++ .../include/__ranges/non_propagating_cache.h | 2 +- lib/libcxx/include/__ranges/repeat_view.h | 4 +- lib/libcxx/include/__ranges/reverse_view.h | 4 +- lib/libcxx/include/__ranges/subrange.h | 10 +- lib/libcxx/include/__ranges/take_view.h | 6 +- lib/libcxx/include/__ranges/to.h | 6 +- lib/libcxx/include/__ranges/transform_view.h | 11 +- lib/libcxx/include/__ranges/zip_view.h | 24 +- lib/libcxx/include/__split_buffer | 11 +- .../include/__stop_token/atomic_unique_lock.h | 2 +- .../__stop_token/intrusive_shared_ptr.h | 3 +- lib/libcxx/include/__string/char_traits.h | 21 +- .../include/__string/constexpr_c_functions.h | 23 +- .../include/__string/extern_template_lists.h | 165 +- .../include/__system_error/error_category.h | 4 +- .../include/__system_error/error_code.h | 4 +- .../include/__system_error/error_condition.h | 8 +- lib/libcxx/include/__thread/formatter.h | 2 +- lib/libcxx/include/__thread/id.h | 4 +- lib/libcxx/include/__thread/support/windows.h | 6 +- lib/libcxx/include/__thread/thread.h | 134 +- lib/libcxx/include/__tree | 630 ++- lib/libcxx/include/__tuple/make_tuple_types.h | 2 +- lib/libcxx/include/__tuple/sfinae_helpers.h | 2 +- lib/libcxx/include/__tuple/tuple_element.h | 10 +- lib/libcxx/include/__tuple/tuple_size.h | 25 +- .../include/__type_traits/add_cv_quals.h | 6 +- .../include/__type_traits/add_pointer.h | 18 +- ...add_lvalue_reference.h => add_reference.h} | 56 +- .../__type_traits/add_rvalue_reference.h | 54 - .../include/__type_traits/aligned_storage.h | 2 +- .../include/__type_traits/alignment_of.h | 3 +- .../include/__type_traits/common_reference.h | 34 +- .../include/__type_traits/common_type.h | 13 +- .../include/__type_traits/conditional.h | 4 +- .../include/__type_traits/container_traits.h | 3 + lib/libcxx/include/__type_traits/copy_cvref.h | 3 +- lib/libcxx/include/__type_traits/decay.h | 44 +- .../include/__type_traits/dependent_type.h | 2 +- .../include/__type_traits/desugars_to.h | 12 + lib/libcxx/include/__type_traits/enable_if.h | 4 +- lib/libcxx/include/__type_traits/extent.h | 12 +- .../has_unique_object_representation.h | 10 +- .../__type_traits/has_virtual_destructor.h | 2 +- .../include/__type_traits/integer_traits.h | 73 + .../include/__type_traits/integral_constant.h | 2 +- lib/libcxx/include/__type_traits/invoke.h | 147 +- .../include/__type_traits/is_abstract.h | 3 +- .../include/__type_traits/is_aggregate.h | 3 +- .../include/__type_traits/is_arithmetic.h | 4 +- lib/libcxx/include/__type_traits/is_array.h | 26 +- .../include/__type_traits/is_assignable.h | 14 +- lib/libcxx/include/__type_traits/is_base_of.h | 6 +- .../include/__type_traits/is_bounded_array.h | 21 +- .../include/__type_traits/is_char_like_type.h | 6 +- lib/libcxx/include/__type_traits/is_class.h | 2 +- .../include/__type_traits/is_compound.h | 4 +- lib/libcxx/include/__type_traits/is_const.h | 22 +- .../include/__type_traits/is_constructible.h | 17 +- .../include/__type_traits/is_convertible.h | 13 +- .../__type_traits/is_core_convertible.h | 25 +- .../include/__type_traits/is_destructible.h | 16 +- lib/libcxx/include/__type_traits/is_empty.h | 2 +- lib/libcxx/include/__type_traits/is_enum.h | 4 +- lib/libcxx/include/__type_traits/is_final.h | 4 +- .../include/__type_traits/is_floating_point.h | 11 +- .../include/__type_traits/is_function.h | 2 +- .../include/__type_traits/is_fundamental.h | 6 +- .../__type_traits/is_implicit_lifetime.h | 3 +- .../include/__type_traits/is_integral.h | 26 +- .../include/__type_traits/is_literal_type.h | 4 +- .../include/__type_traits/is_member_pointer.h | 8 +- .../__type_traits/is_nothrow_assignable.h | 20 +- .../__type_traits/is_nothrow_constructible.h | 19 +- .../__type_traits/is_nothrow_convertible.h | 62 - .../__type_traits/is_nothrow_destructible.h | 17 +- .../include/__type_traits/is_null_pointer.h | 3 +- lib/libcxx/include/__type_traits/is_object.h | 2 +- lib/libcxx/include/__type_traits/is_pod.h | 4 +- lib/libcxx/include/__type_traits/is_pointer.h | 36 +- .../include/__type_traits/is_polymorphic.h | 3 +- .../include/__type_traits/is_reference.h | 16 +- .../__type_traits/is_reference_wrapper.h | 6 +- .../include/__type_traits/is_referenceable.h | 25 +- .../include/__type_traits/is_replaceable.h | 61 + lib/libcxx/include/__type_traits/is_same.h | 2 +- lib/libcxx/include/__type_traits/is_scalar.h | 10 +- lib/libcxx/include/__type_traits/is_signed.h | 17 +- .../include/__type_traits/is_signed_integer.h | 35 - .../__type_traits/is_standard_layout.h | 3 +- .../include/__type_traits/is_swappable.h | 13 +- lib/libcxx/include/__type_traits/is_trivial.h | 7 +- .../__type_traits/is_trivially_assignable.h | 14 +- .../is_trivially_constructible.h | 17 +- .../__type_traits/is_trivially_copyable.h | 3 +- .../__type_traits/is_trivially_destructible.h | 8 +- .../__type_traits/is_unbounded_array.h | 12 +- lib/libcxx/include/__type_traits/is_union.h | 2 +- .../include/__type_traits/is_unsigned.h | 17 +- .../__type_traits/is_unsigned_integer.h | 35 - lib/libcxx/include/__type_traits/is_void.h | 2 +- .../include/__type_traits/is_volatile.h | 22 +- lib/libcxx/include/__type_traits/promote.h | 36 +- lib/libcxx/include/__type_traits/rank.h | 12 +- .../reference_constructs_from_temporary.h | 44 + .../reference_converts_from_temporary.h | 35 + .../__type_traits/remove_all_extents.h | 22 +- .../include/__type_traits/remove_const.h | 4 +- .../include/__type_traits/remove_cvref.h | 4 - .../include/__type_traits/remove_extent.h | 22 +- .../include/__type_traits/remove_pointer.h | 10 +- .../include/__type_traits/remove_volatile.h | 4 +- lib/libcxx/include/__type_traits/result_of.h | 2 +- .../include/__type_traits/strip_signature.h | 36 +- .../include/__type_traits/underlying_type.h | 13 +- lib/libcxx/include/__utility/cmp.h | 16 +- .../include/__utility/convert_to_integral.h | 2 +- .../include/__utility/exception_guard.h | 7 +- lib/libcxx/include/__utility/in_place.h | 4 +- .../include/__utility/integer_sequence.h | 2 +- lib/libcxx/include/__utility/no_destroy.h | 1 - lib/libcxx/include/__utility/pair.h | 180 +- .../include/__utility/piecewise_construct.h | 2 +- lib/libcxx/include/__utility/scope_guard.h | 1 - lib/libcxx/include/__utility/swap.h | 1 - lib/libcxx/include/__utility/to_underlying.h | 4 +- lib/libcxx/include/__variant/monostate.h | 12 +- .../include/__vector/container_traits.h | 4 +- lib/libcxx/include/__vector/vector.h | 126 +- lib/libcxx/include/__vector/vector_bool.h | 93 +- .../include/__vector/vector_bool_formatter.h | 2 +- lib/libcxx/include/__verbose_abort | 8 +- lib/libcxx/include/__verbose_trap | 36 + lib/libcxx/include/algorithm | 297 +- lib/libcxx/include/any | 32 +- lib/libcxx/include/array | 21 +- lib/libcxx/include/barrier | 2 +- lib/libcxx/include/bit | 2 +- lib/libcxx/include/bitset | 386 +- lib/libcxx/include/charconv | 2 +- lib/libcxx/include/chrono | 110 +- lib/libcxx/include/cmath | 8 +- lib/libcxx/include/codecvt | 39 +- lib/libcxx/include/compare | 2 +- lib/libcxx/include/complex | 30 +- lib/libcxx/include/concepts | 2 +- lib/libcxx/include/condition_variable | 67 +- lib/libcxx/include/coroutine | 2 +- lib/libcxx/include/cwchar | 3 +- lib/libcxx/include/deque | 61 +- lib/libcxx/include/execution | 2 +- lib/libcxx/include/expected | 2 +- .../include/experimental/__simd/declaration.h | 2 +- .../include/experimental/__simd/utility.h | 2 +- lib/libcxx/include/experimental/iterator | 8 +- lib/libcxx/include/experimental/memory | 4 +- .../include/experimental/propagate_const | 2 +- lib/libcxx/include/experimental/simd | 2 +- lib/libcxx/include/experimental/type_traits | 10 +- lib/libcxx/include/experimental/utility | 2 +- lib/libcxx/include/ext/__hash | 24 +- lib/libcxx/include/ext/hash_map | 34 +- lib/libcxx/include/ext/hash_set | 12 +- lib/libcxx/include/filesystem | 2 +- lib/libcxx/include/flat_map | 9 + lib/libcxx/include/flat_set | 85 + lib/libcxx/include/format | 2 +- lib/libcxx/include/forward_list | 609 +-- lib/libcxx/include/fstream | 172 +- lib/libcxx/include/functional | 5 + lib/libcxx/include/future | 165 +- lib/libcxx/include/initializer_list | 4 +- lib/libcxx/include/iomanip | 9 +- lib/libcxx/include/ios | 17 +- lib/libcxx/include/iosfwd | 6 +- lib/libcxx/include/istream | 102 +- lib/libcxx/include/iterator | 2 +- lib/libcxx/include/latch | 2 +- lib/libcxx/include/limits | 27 +- lib/libcxx/include/list | 584 +-- lib/libcxx/include/locale | 3494 +---------------- lib/libcxx/include/map | 326 +- lib/libcxx/include/math.h | 11 +- lib/libcxx/include/mdspan | 47 +- lib/libcxx/include/memory | 6 + lib/libcxx/include/memory_resource | 2 +- lib/libcxx/include/mutex | 94 +- lib/libcxx/include/numbers | 2 +- lib/libcxx/include/numeric | 1 + lib/libcxx/include/optional | 298 +- lib/libcxx/include/ostream | 5 + lib/libcxx/include/print | 4 +- lib/libcxx/include/queue | 157 +- lib/libcxx/include/ranges | 12 +- lib/libcxx/include/ratio | 22 +- lib/libcxx/include/regex | 511 +-- lib/libcxx/include/scoped_allocator | 4 +- lib/libcxx/include/semaphore | 2 +- lib/libcxx/include/set | 76 +- lib/libcxx/include/shared_mutex | 177 +- lib/libcxx/include/source_location | 2 +- lib/libcxx/include/span | 6 +- lib/libcxx/include/sstream | 10 +- lib/libcxx/include/stack | 20 +- lib/libcxx/include/stdlib.h | 19 +- lib/libcxx/include/stop_token | 2 +- lib/libcxx/include/streambuf | 63 +- lib/libcxx/include/string | 1538 +++----- lib/libcxx/include/string_view | 11 +- lib/libcxx/include/strstream | 55 +- lib/libcxx/include/syncstream | 17 +- lib/libcxx/include/system_error | 1 + lib/libcxx/include/tuple | 43 +- lib/libcxx/include/type_traits | 344 +- lib/libcxx/include/typeindex | 6 +- lib/libcxx/include/typeinfo | 4 +- lib/libcxx/include/unordered_map | 229 +- lib/libcxx/include/unordered_set | 40 +- lib/libcxx/include/utility | 4 + lib/libcxx/include/valarray | 36 +- lib/libcxx/include/variant | 172 +- lib/libcxx/include/vector | 1 + lib/libcxx/include/version | 31 +- lib/libcxx/src/any.cpp | 2 +- lib/libcxx/src/atomic.cpp | 7 +- lib/libcxx/src/call_once.cpp | 1 + lib/libcxx/src/chrono.cpp | 12 +- lib/libcxx/src/condition_variable.cpp | 14 +- .../experimental/log_hardening_failure.cpp | 31 + lib/libcxx/src/experimental/time_zone.cpp | 9 + lib/libcxx/src/experimental/tzdb.cpp | 68 +- .../src/filesystem/directory_iterator.cpp | 1 + lib/libcxx/src/filesystem/error.h | 13 +- .../src/filesystem/filesystem_clock.cpp | 6 +- .../src/filesystem/filesystem_error.cpp | 1 + lib/libcxx/src/filesystem/operations.cpp | 1 + lib/libcxx/src/filesystem/path_parser.h | 2 +- lib/libcxx/src/functional.cpp | 6 +- lib/libcxx/src/future.cpp | 20 +- lib/libcxx/src/hash.cpp | 19 +- lib/libcxx/src/include/overridable_function.h | 31 +- lib/libcxx/src/include/ryu/common.h | 1 + lib/libcxx/src/ios.cpp | 10 +- lib/libcxx/src/iostream.cpp | 165 +- lib/libcxx/src/locale.cpp | 213 +- lib/libcxx/src/memory.cpp | 2 + lib/libcxx/src/memory_resource.cpp | 8 +- lib/libcxx/src/mutex.cpp | 9 +- lib/libcxx/src/new.cpp | 23 +- lib/libcxx/src/optional.cpp | 2 +- lib/libcxx/src/print.cpp | 2 +- lib/libcxx/src/random.cpp | 25 +- lib/libcxx/src/ryu/d2fixed.cpp | 1 + lib/libcxx/src/ryu/d2s.cpp | 1 + lib/libcxx/src/ryu/f2s.cpp | 2 + lib/libcxx/src/std_stream.h | 2 +- lib/libcxx/src/string.cpp | 40 +- lib/libcxx/src/thread.cpp | 6 +- lib/libcxx/src/verbose_abort.cpp | 2 +- 562 files changed, 16867 insertions(+), 12974 deletions(-) create mode 100644 lib/libcxx/include/__algorithm/for_each_n_segment.h create mode 100644 lib/libcxx/include/__algorithm/out_value_result.h create mode 100644 lib/libcxx/include/__chrono/gps_clock.h create mode 100644 lib/libcxx/include/__chrono/tai_clock.h create mode 100644 lib/libcxx/include/__flat_set/flat_multiset.h create mode 100644 lib/libcxx/include/__flat_set/flat_set.h create mode 100644 lib/libcxx/include/__flat_set/ra_iterator.h create mode 100644 lib/libcxx/include/__flat_set/utils.h create mode 100644 lib/libcxx/include/__fwd/map.h create mode 100644 lib/libcxx/include/__fwd/set.h create mode 100644 lib/libcxx/include/__iterator/product_iterator.h create mode 100644 lib/libcxx/include/__locale_dir/check_grouping.h create mode 100644 lib/libcxx/include/__locale_dir/get_c_locale.h create mode 100644 lib/libcxx/include/__locale_dir/messages.h create mode 100644 lib/libcxx/include/__locale_dir/money.h create mode 100644 lib/libcxx/include/__locale_dir/num.h create mode 100644 lib/libcxx/include/__locale_dir/scan_keyword.h create mode 100644 lib/libcxx/include/__locale_dir/support/linux.h create mode 100644 lib/libcxx/include/__locale_dir/time.h create mode 100644 lib/libcxx/include/__locale_dir/wbuffer_convert.h create mode 100644 lib/libcxx/include/__locale_dir/wstring_convert.h create mode 100644 lib/libcxx/include/__log_hardening_failure create mode 100644 lib/libcxx/include/__mdspan/aligned_accessor.h create mode 100644 lib/libcxx/include/__memory/destroy.h create mode 100644 lib/libcxx/include/__memory/is_sufficiently_aligned.h create mode 100644 lib/libcxx/include/__memory/ranges_destroy.h create mode 100644 lib/libcxx/include/__numeric/ranges_iota.h create mode 100644 lib/libcxx/include/__ranges/join_with_view.h rename lib/libcxx/include/__type_traits/{add_lvalue_reference.h => add_reference.h} (51%) delete mode 100644 lib/libcxx/include/__type_traits/add_rvalue_reference.h create mode 100644 lib/libcxx/include/__type_traits/integer_traits.h delete mode 100644 lib/libcxx/include/__type_traits/is_nothrow_convertible.h create mode 100644 lib/libcxx/include/__type_traits/is_replaceable.h delete mode 100644 lib/libcxx/include/__type_traits/is_signed_integer.h delete mode 100644 lib/libcxx/include/__type_traits/is_unsigned_integer.h create mode 100644 lib/libcxx/include/__type_traits/reference_constructs_from_temporary.h create mode 100644 lib/libcxx/include/__type_traits/reference_converts_from_temporary.h create mode 100644 lib/libcxx/include/__verbose_trap create mode 100644 lib/libcxx/include/flat_set create mode 100644 lib/libcxx/src/experimental/log_hardening_failure.cpp diff --git a/lib/libcxx/include/__algorithm/copy.h b/lib/libcxx/include/__algorithm/copy.h index 962aa90059..ea98031df1 100644 --- a/lib/libcxx/include/__algorithm/copy.h +++ b/lib/libcxx/include/__algorithm/copy.h @@ -13,8 +13,10 @@ #include <__algorithm/for_each_segment.h> #include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> +#include <__memory/pointer_traits.h> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> #include <__utility/move.h> @@ -29,9 +31,129 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result); + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter); +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first.__ctz_); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __nw = __n / __bits_per_word; + std::copy(std::__to_address(__first.__seg_), + std::__to_address(__first.__seg_ + __nw), + std::__to_address(__result.__seg_)); + __n -= __nw * __bits_per_word; + __result.__seg_ += __nw; + // do last word + if (__n > 0) { + __first.__seg_ += __nw; + __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(__n); + } + } + return __result; +} + +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_); + __storage_type __b = *__first.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); + __m = std::__middle_mask<__storage_type>(__clz_r - __ddn, __result.__ctz_); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); + else + *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __dn); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = std::__leading_mask<__storage_type>(__result.__ctz_); + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { + __storage_type __b = *__first.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + ++__result.__seg_; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b >> __clz_r; + } + // do last word + if (__n > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + __storage_type __dn = std::min(__n, static_cast(__clz_r)); + __m = std::__middle_mask<__storage_type>(__clz_r - __dn, __result.__ctz_); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __dn; + __result.__ctz_ = static_cast(__n); + } + } + } + return __result; +} + struct __copy_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> @@ -95,6 +217,16 @@ struct __copy_impl { } } + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > + operator()(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) const { + if (__first.__ctz_ == __result.__ctz_) + return std::make_pair(__last, std::__copy_aligned(__first, __last, __result)); + return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result)); + } + // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> @@ -110,7 +242,7 @@ __copy(_InIter __first, _Sent __last, _OutIter __result) { } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { return std::__copy(__first, __last, __result).second; } diff --git a/lib/libcxx/include/__algorithm/copy_backward.h b/lib/libcxx/include/__algorithm/copy_backward.h index 48a768f577..9f890645a4 100644 --- a/lib/libcxx/include/__algorithm/copy_backward.h +++ b/lib/libcxx/include/__algorithm/copy_backward.h @@ -10,11 +10,14 @@ #define _LIBCPP___ALGORITHM_COPY_BACKWARD_H #include <__algorithm/copy_move_common.h> +#include <__algorithm/copy_n.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> +#include <__memory/pointer_traits.h> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> @@ -34,6 +37,124 @@ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter> __copy_backward(_InIter __first, _Sent __last, _OutIter __result); +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__last.__ctz_ != 0) { + difference_type __dn = std::min(static_cast(__last.__ctz_), __n); + __n -= __dn; + unsigned __clz = __bits_per_word - __last.__ctz_; + __storage_type __m = std::__middle_mask<__storage_type>(__clz, __last.__ctz_ - __dn); + __storage_type __b = *__last.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + // __last.__ctz_ = 0 + } + // __last.__ctz_ == 0 || __n == 0 + // __result.__ctz_ == 0 || __n == 0 + // do middle words + __storage_type __nw = __n / __bits_per_word; + __result.__seg_ -= __nw; + __last.__seg_ -= __nw; + std::copy_n(std::__to_address(__last.__seg_), __nw, std::__to_address(__result.__seg_)); + __n -= __nw * __bits_per_word; + // do last word + if (__n > 0) { + __storage_type __m = std::__leading_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *--__last.__seg_ & __m; + *--__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); + } + } + return __result; +} + +template +_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { + using _In = __bit_iterator<_Cp, _IsConst>; + using difference_type = typename _In::difference_type; + using __storage_type = typename _In::__storage_type; + + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__last.__ctz_ != 0) { + difference_type __dn = std::min(static_cast(__last.__ctz_), __n); + __n -= __dn; + unsigned __clz_l = __bits_per_word - __last.__ctz_; + __storage_type __m = std::__middle_mask<__storage_type>(__clz_l, __last.__ctz_ - __dn); + __storage_type __b = *__last.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = std::min(__dn, static_cast(__result.__ctz_)); + if (__ddn > 0) { + __m = std::__middle_mask<__storage_type>(__clz_r, __result.__ctz_ - __ddn); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __last.__ctz_) + *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); + else + *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); + __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + } + if (__dn > 0) { + // __result.__ctz_ == 0 + --__result.__seg_; + __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); + __m = std::__leading_mask<__storage_type>(__result.__ctz_); + *__result.__seg_ &= ~__m; + __last.__ctz_ -= __dn + __ddn; + *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); + } + // __last.__ctz_ = 0 + } + // __last.__ctz_ == 0 || __n == 0 + // __result.__ctz_ != 0 || __n == 0 + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = std::__trailing_mask<__storage_type>(__clz_r); + for (; __n >= __bits_per_word; __n -= __bits_per_word) { + __storage_type __b = *--__last.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __clz_r; + *--__result.__seg_ &= __m; + *__result.__seg_ |= __b << __result.__ctz_; + } + // do last word + if (__n > 0) { + __m = std::__leading_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *--__last.__seg_ & __m; + __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __dn = std::min(__n, static_cast(__result.__ctz_)); + __m = std::__middle_mask<__storage_type>(__clz_r, __result.__ctz_ - __dn); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); + __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + // __result.__ctz_ == 0 + --__result.__seg_; + __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); + __m = std::__leading_mask<__storage_type>(__result.__ctz_); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); + } + } + } + return __result; +} + template struct __copy_backward_impl { template @@ -107,6 +228,16 @@ struct __copy_backward_impl { } } + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > + operator()(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + if (__last.__ctz_ == __result.__ctz_) + return std::make_pair(__last, std::__copy_backward_aligned(__first, __last, __result)); + return std::make_pair(__last, std::__copy_backward_unaligned(__first, __last, __result)); + } + // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> diff --git a/lib/libcxx/include/__algorithm/count.h b/lib/libcxx/include/__algorithm/count.h index cd9125779e..0cbe9b6e61 100644 --- a/lib/libcxx/include/__algorithm/count.h +++ b/lib/libcxx/include/__algorithm/count.h @@ -55,18 +55,18 @@ __count_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_t if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = std::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = std::__libcpp_popcount(std::__invert_if(*__first.__seg_) & __m); + __storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_); + __r = std::__popcount(__storage_type(std::__invert_if(*__first.__seg_) & __m)); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += std::__libcpp_popcount(std::__invert_if(*__first.__seg_)); + __r += std::__popcount(std::__invert_if(*__first.__seg_)); // do last partial word if (__n > 0) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += std::__libcpp_popcount(std::__invert_if(*__first.__seg_) & __m); + __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + __r += std::__popcount(__storage_type(std::__invert_if(*__first.__seg_) & __m)); } return __r; } diff --git a/lib/libcxx/include/__algorithm/equal.h b/lib/libcxx/include/__algorithm/equal.h index a276bb9954..5a8c9504ed 100644 --- a/lib/libcxx/include/__algorithm/equal.h +++ b/lib/libcxx/include/__algorithm/equal.h @@ -11,16 +11,20 @@ #define _LIBCPP___ALGORITHM_EQUAL_H #include <__algorithm/comp.h> +#include <__algorithm/min.h> #include <__algorithm/unwrap_iter.h> #include <__config> #include <__functional/identity.h> +#include <__fwd/bit_reference.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__memory/pointer_traits.h> #include <__string/constexpr_c_functions.h> #include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_equality_comparable.h> +#include <__type_traits/is_same.h> #include <__type_traits/is_volatile.h> #include <__utility/move.h> @@ -33,6 +37,140 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +template +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool +__equal_unaligned(__bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2) { + using _It = __bit_iterator<_Cp, _IsConst1>; + using difference_type = typename _It::difference_type; + using __storage_type = typename _It::__storage_type; + + const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) { + // do first word + if (__first1.__ctz_ != 0) { + unsigned __clz_f = __bits_per_word - __first1.__ctz_; + difference_type __dn = std::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first1.__ctz_); + __storage_type __b = *__first1.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); + __m = std::__middle_mask<__storage_type>(__clz_r - __ddn, __first2.__ctz_); + if (__first2.__ctz_ > __first1.__ctz_) { + if (static_cast<__storage_type>(*__first2.__seg_ & __m) != + static_cast<__storage_type>(__b << (__first2.__ctz_ - __first1.__ctz_))) + return false; + } else { + if (static_cast<__storage_type>(*__first2.__seg_ & __m) != + static_cast<__storage_type>(__b >> (__first1.__ctz_ - __first2.__ctz_))) + return false; + } + __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + if (static_cast<__storage_type>(*__first2.__seg_ & __m) != + static_cast<__storage_type>(__b >> (__first1.__ctz_ + __ddn))) + return false; + __first2.__ctz_ = static_cast(__dn); + } + ++__first1.__seg_; + // __first1.__ctz_ = 0; + } + // __first1.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __m = std::__leading_mask<__storage_type>(__first2.__ctz_); + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) { + __storage_type __b = *__first1.__seg_; + if (static_cast<__storage_type>(*__first2.__seg_ & __m) != static_cast<__storage_type>(__b << __first2.__ctz_)) + return false; + ++__first2.__seg_; + if (static_cast<__storage_type>(*__first2.__seg_ & static_cast<__storage_type>(~__m)) != + static_cast<__storage_type>(__b >> __clz_r)) + return false; + } + // do last word + if (__n > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + __storage_type __b = *__first1.__seg_ & __m; + __storage_type __dn = std::min(__n, static_cast(__clz_r)); + __m = std::__middle_mask<__storage_type>(__clz_r - __dn, __first2.__ctz_); + if (static_cast<__storage_type>(*__first2.__seg_ & __m) != static_cast<__storage_type>(__b << __first2.__ctz_)) + return false; + __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + if (static_cast<__storage_type>(*__first2.__seg_ & __m) != static_cast<__storage_type>(__b >> __dn)) + return false; + } + } + } + return true; +} + +template +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool +__equal_aligned(__bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2) { + using _It = __bit_iterator<_Cp, _IsConst1>; + using difference_type = typename _It::difference_type; + using __storage_type = typename _It::__storage_type; + + const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) { + // do first word + if (__first1.__ctz_ != 0) { + unsigned __clz = __bits_per_word - __first1.__ctz_; + difference_type __dn = std::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = std::__middle_mask<__storage_type>(__clz - __dn, __first1.__ctz_); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + return false; + ++__first2.__seg_; + ++__first1.__seg_; + // __first1.__ctz_ = 0; + // __first2.__ctz_ = 0; + } + // __first1.__ctz_ == 0; + // __first2.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) + if (*__first2.__seg_ != *__first1.__seg_) + return false; + // do last word + if (__n > 0) { + __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + return false; + } + } + return true; +} + +template , int> = 0> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( + __bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2, + _BinaryPredicate) { + if (__first1.__ctz_ == __first2.__ctz_) + return std::__equal_aligned(__first1, __last1, __first2); + return std::__equal_unaligned(__first1, __last1, __first2); +} + template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) { @@ -94,6 +232,28 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } +template && __is_identity<_Proj1>::value && + __is_identity<_Proj2>::value, + int> = 0> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( + __bit_iterator<_Cp, _IsConst1> __first1, + __bit_iterator<_Cp, _IsConst1> __last1, + __bit_iterator<_Cp, _IsConst2> __first2, + __bit_iterator<_Cp, _IsConst2>, + _Pred&, + _Proj1&, + _Proj2&) { + if (__first1.__ctz_ == __first2.__ctz_) + return std::__equal_aligned(__first1, __last1, __first2); + return std::__equal_unaligned(__first1, __last1, __first2); +} + template [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, diff --git a/lib/libcxx/include/__algorithm/fill_n.h b/lib/libcxx/include/__algorithm/fill_n.h index a7e01c45b9..0da78e1f38 100644 --- a/lib/libcxx/include/__algorithm/fill_n.h +++ b/lib/libcxx/include/__algorithm/fill_n.h @@ -41,11 +41,7 @@ __fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_typ if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = std::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - if (_FillVal) - *__first.__seg_ |= __m; - else - *__first.__seg_ &= ~__m; + std::__fill_masked_range(std::__to_address(__first.__seg_), __clz_f - __dn, __first.__ctz_, _FillVal); __n -= __dn; ++__first.__seg_; } @@ -56,11 +52,7 @@ __fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_typ // do last partial word if (__n > 0) { __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (_FillVal) - *__first.__seg_ |= __m; - else - *__first.__seg_ &= ~__m; + std::__fill_masked_range(std::__to_address(__first.__seg_), __bits_per_word - __n, 0u, _FillVal); } } diff --git a/lib/libcxx/include/__algorithm/find.h b/lib/libcxx/include/__algorithm/find.h index 24b8b2f964..a7d9374b3a 100644 --- a/lib/libcxx/include/__algorithm/find.h +++ b/lib/libcxx/include/__algorithm/find.h @@ -106,10 +106,10 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_ty if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = std::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __m = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_); __storage_type __b = std::__invert_if(*__first.__seg_) & __m; if (__b) - return _It(__first.__seg_, static_cast(std::__libcpp_ctz(__b))); + return _It(__first.__seg_, static_cast(std::__countr_zero(__b))); if (__n == __dn) return __first + __n; __n -= __dn; @@ -119,14 +119,14 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_ty for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) { __storage_type __b = std::__invert_if(*__first.__seg_); if (__b) - return _It(__first.__seg_, static_cast(std::__libcpp_ctz(__b))); + return _It(__first.__seg_, static_cast(std::__countr_zero(__b))); } // do last partial word if (__n > 0) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n); __storage_type __b = std::__invert_if(*__first.__seg_) & __m; if (__b) - return _It(__first.__seg_, static_cast(std::__libcpp_ctz(__b))); + return _It(__first.__seg_, static_cast(std::__countr_zero(__b))); } return _It(__first.__seg_, static_cast(__n)); } diff --git a/lib/libcxx/include/__algorithm/for_each.h b/lib/libcxx/include/__algorithm/for_each.h index e08f583504..4167eec350 100644 --- a/lib/libcxx/include/__algorithm/for_each.h +++ b/lib/libcxx/include/__algorithm/for_each.h @@ -12,9 +12,10 @@ #include <__algorithm/for_each_segment.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/segmented_iterator.h> -#include <__ranges/movable_box.h> -#include <__utility/in_place.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -26,28 +27,36 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function -for_each(_InputIterator __first, _InputIterator __last, _Function __f) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) { for (; __first != __last; ++__first) - __f(*__first); - return __f; + std::__invoke(__f, std::__invoke(__proj, *__first)); + return __first; } -// __movable_box is available in C++20, but is actually a copyable-box, so optimization is only correct in C++23 -#if _LIBCPP_STD_VER >= 23 -template - requires __is_segmented_iterator<_SegmentedIterator>::value -_LIBCPP_HIDE_FROM_ABI constexpr _Function -for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function __func) { - ranges::__movable_box<_Function> __wrapped_func(in_place, std::move(__func)); - std::__for_each_segment(__first, __last, [&](auto __lfirst, auto __llast) { - __wrapped_func = - ranges::__movable_box<_Function>(in_place, std::for_each(__lfirst, __llast, std::move(*__wrapped_func))); +#ifndef _LIBCPP_CXX03_LANG +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator +__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) { + using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; + std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { + std::__for_each(__lfirst, __llast, __func, __proj); }); - return std::move(*__wrapped_func); + return __last; +} +#endif // !_LIBCPP_CXX03_LANG + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func +for_each(_InputIterator __first, _InputIterator __last, _Func __f) { + __identity __proj; + std::__for_each(__first, __last, __f, __proj); + return __f; } -#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/for_each_n.h b/lib/libcxx/include/__algorithm/for_each_n.h index fce380b49d..9a6c6bb517 100644 --- a/lib/libcxx/include/__algorithm/for_each_n.h +++ b/lib/libcxx/include/__algorithm/for_each_n.h @@ -10,32 +10,93 @@ #ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_H #define _LIBCPP___ALGORITHM_FOR_EACH_N_H +#include <__algorithm/for_each.h> +#include <__algorithm/for_each_n_segment.h> #include <__config> +#include <__functional/identity.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/segmented_iterator.h> +#include <__type_traits/disjunction.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__type_traits/negation.h> #include <__utility/convert_to_integral.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 17 - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) { +template ::value && + _Or< _Not<__is_segmented_iterator<_InputIterator> >, + _Not<__has_random_access_local_iterator<_InputIterator> > >::value, + int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; _IntegralSize __n = __orig_n; while (__n > 0) { - __f(*__first); + std::__invoke(__f, std::__invoke(__proj, *__first)); ++__first; --__n; } - return __first; + return std::move(__first); } -#endif +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter +__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) { + typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n; + auto __last = __first + __n; + std::__for_each(__first, __last, __f, __proj); + return __last; +} + +#ifndef _LIBCPP_CXX03_LANG +template ::value && + __is_segmented_iterator<_SegmentedIterator>::value && + __has_random_access_iterator_category< + typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, + int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator +__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { + using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; + return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { + std::__for_each(__lfirst, __llast, __f, __proj); + }); +} +#endif // !_LIBCPP_CXX03_LANG + +#if _LIBCPP_STD_VER >= 17 + +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +for_each_n(_InputIterator __first, _Size __orig_n, _Func __f) { + __identity __proj; + return std::__for_each_n(__first, __orig_n, __f, __proj); +} + +#endif // _LIBCPP_STD_VER >= 17 _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_FOR_EACH_N_H diff --git a/lib/libcxx/include/__algorithm/for_each_n_segment.h b/lib/libcxx/include/__algorithm/for_each_n_segment.h new file mode 100644 index 0000000000..1b522fb373 --- /dev/null +++ b/lib/libcxx/include/__algorithm/for_each_n_segment.h @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H +#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__iterator/segmented_iterator.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// __for_each_n_segment optimizes linear iteration over segmented iterators. It processes a segmented +// input range [__first, __first + __n) by applying the functor __func to each element within the segment. +// The return value of __func is ignored, and the function returns an iterator pointing to one past the +// last processed element in the input range. + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator +__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) { + static_assert(__is_segmented_iterator<_SegmentedIterator>::value && + __has_random_access_iterator_category< + typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, + "__for_each_n_segment only works with segmented iterators with random-access local iterators"); + if (__orig_n <= 0) + return __first; + + using _Traits = __segmented_iterator_traits<_SegmentedIterator>; + using __local_iter_t = typename _Traits::__local_iterator; + using __difference_t = typename std::iterator_traits<__local_iter_t>::difference_type; + __difference_t __n = __orig_n; + auto __seg = _Traits::__segment(__first); + auto __local_first = _Traits::__local(__first); + __local_iter_t __local_last; + + while (__n > 0) { + __local_last = _Traits::__end(__seg); + auto __seg_size = __local_last - __local_first; + if (__n <= __seg_size) { + __local_last = __local_first + __n; + __func(__local_first, __local_last); + break; + } + __func(__local_first, __local_last); + __n -= __seg_size; + __local_first = _Traits::__begin(++__seg); + } + + return _Traits::__compose(__seg, __local_last); +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H diff --git a/lib/libcxx/include/__algorithm/inplace_merge.h b/lib/libcxx/include/__algorithm/inplace_merge.h index 1fc31b66f4..fbfe89936d 100644 --- a/lib/libcxx/include/__algorithm/inplace_merge.h +++ b/lib/libcxx/include/__algorithm/inplace_merge.h @@ -22,6 +22,7 @@ #include <__functional/identity.h> #include <__iterator/iterator_traits.h> #include <__iterator/reverse_iterator.h> +#include <__memory/construct_at.h> #include <__memory/destruct_n.h> #include <__memory/unique_ptr.h> #include <__memory/unique_temporary_buffer.h> @@ -106,13 +107,13 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __buffered_inplace_merg value_type* __p = __buff; for (_BidirectionalIterator __i = __first; __i != __middle; __d.template __incr(), (void)++__i, (void)++__p) - ::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i)); + std::__construct_at(__p, _IterOps<_AlgPolicy>::__iter_move(__i)); std::__half_inplace_merge<_AlgPolicy>(__buff, __p, __middle, __last, __first, __comp); } else { value_type* __p = __buff; for (_BidirectionalIterator __i = __middle; __i != __last; __d.template __incr(), (void)++__i, (void)++__p) - ::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i)); + std::__construct_at(__p, _IterOps<_AlgPolicy>::__iter_move(__i)); typedef reverse_iterator<_BidirectionalIterator> _RBi; typedef reverse_iterator _Rv; typedef __invert<_Compare> _Inverted; @@ -203,7 +204,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __inplace_merge( } template -_LIBCPP_HIDE_FROM_ABI void __inplace_merge( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __inplace_merge( _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare&& __comp) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; @@ -223,14 +224,14 @@ _LIBCPP_HIDE_FROM_ABI void __inplace_merge( } template -inline _LIBCPP_HIDE_FROM_ABI void inplace_merge( +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void inplace_merge( _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp) { std::__inplace_merge<_ClassicAlgPolicy>( std::move(__first), std::move(__middle), std::move(__last), static_cast<__comp_ref_type<_Compare> >(__comp)); } template -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last) { std::inplace_merge(std::move(__first), std::move(__middle), std::move(__last), __less<>()); } diff --git a/lib/libcxx/include/__algorithm/min_element.h b/lib/libcxx/include/__algorithm/min_element.h index db996365bf..fdab63aefe 100644 --- a/lib/libcxx/include/__algorithm/min_element.h +++ b/lib/libcxx/include/__algorithm/min_element.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter -__min_element(_Iter __first, _Sent __last, _Comp __comp, _Proj& __proj) { +__min_element(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { if (__first == __last) return __first; diff --git a/lib/libcxx/include/__algorithm/move.h b/lib/libcxx/include/__algorithm/move.h index 6f3b0eb5d2..a3320e9f19 100644 --- a/lib/libcxx/include/__algorithm/move.h +++ b/lib/libcxx/include/__algorithm/move.h @@ -9,11 +9,13 @@ #ifndef _LIBCPP___ALGORITHM_MOVE_H #define _LIBCPP___ALGORITHM_MOVE_H +#include <__algorithm/copy.h> #include <__algorithm/copy_move_common.h> #include <__algorithm/for_each_segment.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> @@ -98,6 +100,14 @@ struct __move_impl { } } + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > + operator()(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + return std::__copy(__first, __last, __result); + } + // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> diff --git a/lib/libcxx/include/__algorithm/move_backward.h b/lib/libcxx/include/__algorithm/move_backward.h index 24a8d9b245..14482fee18 100644 --- a/lib/libcxx/include/__algorithm/move_backward.h +++ b/lib/libcxx/include/__algorithm/move_backward.h @@ -9,10 +9,12 @@ #ifndef _LIBCPP___ALGORITHM_MOVE_BACKWARD_H #define _LIBCPP___ALGORITHM_MOVE_BACKWARD_H +#include <__algorithm/copy_backward.h> #include <__algorithm/copy_move_common.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> @@ -107,6 +109,14 @@ struct __move_backward_impl { } } + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> > + operator()(__bit_iterator<_Cp, _IsConst> __first, + __bit_iterator<_Cp, _IsConst> __last, + __bit_iterator<_Cp, false> __result) { + return std::__copy_backward<_ClassicAlgPolicy>(__first, __last, __result); + } + // At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer. template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*> diff --git a/lib/libcxx/include/__algorithm/out_value_result.h b/lib/libcxx/include/__algorithm/out_value_result.h new file mode 100644 index 0000000000..9e1e0e0728 --- /dev/null +++ b/lib/libcxx/include/__algorithm/out_value_result.h @@ -0,0 +1,56 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_OUT_VALUE_RESULT_H +#define _LIBCPP___ALGORITHM_OUT_VALUE_RESULT_H + +#include <__concepts/convertible_to.h> +#include <__config> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +namespace ranges { + +template +struct out_value_result { + _LIBCPP_NO_UNIQUE_ADDRESS _OutIter1 out; + _LIBCPP_NO_UNIQUE_ADDRESS _ValType1 value; + + template + requires convertible_to && convertible_to + _LIBCPP_HIDE_FROM_ABI constexpr operator out_value_result<_OutIter2, _ValType2>() const& { + return {out, value}; + } + + template + requires convertible_to<_OutIter1, _OutIter2> && convertible_to<_ValType1, _ValType2> + _LIBCPP_HIDE_FROM_ABI constexpr operator out_value_result<_OutIter2, _ValType2>() && { + return {std::move(out), std::move(value)}; + } +}; + +} // namespace ranges + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_OUT_VALUE_RESULT_H diff --git a/lib/libcxx/include/__algorithm/radix_sort.h b/lib/libcxx/include/__algorithm/radix_sort.h index de6927995e..055d8a0765 100644 --- a/lib/libcxx/include/__algorithm/radix_sort.h +++ b/lib/libcxx/include/__algorithm/radix_sort.h @@ -29,10 +29,12 @@ #include <__algorithm/for_each.h> #include <__algorithm/move.h> +#include <__bit/bit_cast.h> #include <__bit/bit_log2.h> -#include <__bit/countl.h> #include <__config> +#include <__cstddef/size_t.h> #include <__functional/identity.h> +#include <__iterator/access.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/move_iterator.h> @@ -43,9 +45,12 @@ #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_assignable.h> +#include <__type_traits/is_enum.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_unsigned.h> #include <__type_traits/make_unsigned.h> +#include <__type_traits/void_t.h> +#include <__utility/declval.h> #include <__utility/forward.h> #include <__utility/integer_sequence.h> #include <__utility/move.h> @@ -67,7 +72,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 14 template -_LIBCPP_HIDE_FROM_ABI pair<_OutputIterator, __iter_value_type<_InputIterator>> +_LIBCPP_HIDE_FROM_ABI constexpr pair<_OutputIterator, __iter_value_type<_InputIterator>> __partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { if (__first == __last) return {__result, 0}; @@ -109,7 +114,7 @@ struct __counting_sort_traits { }; template -_LIBCPP_HIDE_FROM_ABI auto __nth_radix(size_t __radix_number, _Radix __radix, _Integer __n) { +_LIBCPP_HIDE_FROM_ABI constexpr auto __nth_radix(size_t __radix_number, _Radix __radix, _Integer __n) { static_assert(is_unsigned<_Integer>::value); using __traits = __counting_sort_traits<_Integer, _Radix>; @@ -117,7 +122,7 @@ _LIBCPP_HIDE_FROM_ABI auto __nth_radix(size_t __radix_number, _Radix __radix, _I } template -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI constexpr void __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) { using __value_type = __iter_value_type<_ForwardIterator>; using __traits = __counting_sort_traits<__value_type, _Map>; @@ -129,7 +134,7 @@ __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _Random } template -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI constexpr void __dispose(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator1 __result, @@ -147,7 +152,7 @@ template -_LIBCPP_HIDE_FROM_ABI bool __collect_impl( +_LIBCPP_HIDE_FROM_ABI constexpr bool __collect_impl( _ForwardIterator __first, _ForwardIterator __last, _Map __map, @@ -177,7 +182,7 @@ _LIBCPP_HIDE_FROM_ABI bool __collect_impl( } template -_LIBCPP_HIDE_FROM_ABI bool +_LIBCPP_HIDE_FROM_ABI constexpr bool __collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, @@ -191,7 +196,7 @@ __collect(_ForwardIterator __first, } template -_LIBCPP_HIDE_FROM_ABI void __dispose_backward( +_LIBCPP_HIDE_FROM_ABI constexpr void __dispose_backward( _BidirectionalIterator __first, _BidirectionalIterator __last, _RandomAccessIterator1 __result, @@ -206,7 +211,7 @@ _LIBCPP_HIDE_FROM_ABI void __dispose_backward( } template -_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator +_LIBCPP_HIDE_FROM_ABI constexpr _RandomAccessIterator __counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) { using __value_type = __iter_value_type<_ForwardIterator>; using __traits = __counting_sort_traits<__value_type, _Map>; @@ -225,7 +230,7 @@ template , _Map, _Radix>::__radix_count == 1, int> = 0> -_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl( +_LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl( _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer, @@ -245,7 +250,7 @@ template < class _Radix, enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0, int> = 0 > -_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl( +_LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort_impl( _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer_begin, @@ -297,6 +302,96 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) { return static_cast >(__n ^ __min_value); } +template +struct __unsigned_integer_of_size; + +template <> +struct __unsigned_integer_of_size<1> { + using type _LIBCPP_NODEBUG = uint8_t; +}; + +template <> +struct __unsigned_integer_of_size<2> { + using type _LIBCPP_NODEBUG = uint16_t; +}; + +template <> +struct __unsigned_integer_of_size<4> { + using type _LIBCPP_NODEBUG = uint32_t; +}; + +template <> +struct __unsigned_integer_of_size<8> { + using type _LIBCPP_NODEBUG = uint64_t; +}; + +# if _LIBCPP_HAS_INT128 +template <> +struct __unsigned_integer_of_size<16> { + using type _LIBCPP_NODEBUG = unsigned __int128; +}; +# endif + +template +using __unsigned_integer_of_size_t _LIBCPP_NODEBUG = typename __unsigned_integer_of_size<_Size>::type; + +template +using __unsigned_representation_for_t _LIBCPP_NODEBUG = __unsigned_integer_of_size_t; + +// The function `__to_ordered_integral` is defined for integers and IEEE 754 floating-point numbers. +// Returns an integer representation such that for any `x` and `y` such that `x < y`, the expression +// `__to_ordered_integral(x) < __to_ordered_integral(y)` is true, where `x`, `y` are integers or IEEE 754 floats. +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Integral __n) { + return __n; +} + +// An overload for IEEE 754 floating-point numbers + +// For the floats conforming to IEEE 754 (IEC 559) standard, we know that: +// 1. The bit representation of positive floats directly reflects their order: +// When comparing floats by magnitude, the number with the larger exponent is greater, and if the exponents are +// equal, the one with the larger mantissa is greater. +// 2. The bit representation of negative floats reflects their reverse order (for the same reasons). +// 3. The most significant bit (sign bit) is zero for positive floats and one for negative floats. Therefore, in the raw +// bit representation, any negative number will be greater than any positive number. + +// The only exception from this rule is `NaN`, which is unordered by definition. + +// Based on the above, to obtain correctly ordered integral representation of floating-point numbers, we need to: +// 1. Invert the bit representation (including the sign bit) of negative floats to switch from reverse order to direct +// order; +// 2. Invert the sign bit for positive floats. + +// Thus, in final integral representation, we have reversed the order for negative floats and made all negative floats +// smaller than all positive numbers (by inverting the sign bit). +template ::is_iec559, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Floating __f) { + using __integral_type = __unsigned_representation_for_t<_Floating>; + constexpr auto __bit_count = std::numeric_limits<__integral_type>::digits; + constexpr auto __sign_bit_mask = static_cast<__integral_type>(__integral_type{1} << (__bit_count - 1)); + + const auto __u = std::__bit_cast<__integral_type>(__f); + + return static_cast<__integral_type>(__u & __sign_bit_mask ? ~__u : __u ^ __sign_bit_mask); +} + +// There may exist user-defined comparison for enum, so we cannot compare enums just like integers. +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(_Enum __e) = delete; + +// `long double` varies significantly across platforms and compilers, making it practically +// impossible to determine its actual bit width for conversion to an ordered integer. +inline _LIBCPP_HIDE_FROM_ABI constexpr auto __to_ordered_integral(long double) = delete; + +template +inline const bool __is_ordered_integer_representable_v = false; + +template +inline const bool + __is_ordered_integer_representable_v<_Tp, __void_t()))>> = + true; + struct __low_byte_fn { template _LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const { @@ -307,18 +402,20 @@ struct __low_byte_fn { }; template -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer, _Map __map, _Radix __radix) { - auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); }; + auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { + return std::__shift_to_unsigned(__map(std::__to_ordered_integral(__x))); + }; std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix); } template -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI constexpr void __radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer) { std::__radix_sort(__first, __last, __buffer, __identity{}, __low_byte_fn{}); } diff --git a/lib/libcxx/include/__algorithm/ranges_for_each.h b/lib/libcxx/include/__algorithm/ranges_for_each.h index de39bc5522..e9c84e8583 100644 --- a/lib/libcxx/include/__algorithm/ranges_for_each.h +++ b/lib/libcxx/include/__algorithm/ranges_for_each.h @@ -9,10 +9,12 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H +#include <__algorithm/for_each.h> +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> +#include <__concepts/assignable.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/projected.h> #include <__ranges/access.h> @@ -41,9 +43,17 @@ private: template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - for (; __first != __last; ++__first) - std::invoke(__func, std::invoke(__proj, *__first)); - return {std::move(__first), std::move(__func)}; + // In the case where we have different iterator and sentinel types, the segmented iterator optimization + // in std::for_each will not kick in. Therefore, we prefer std::for_each_n in that case (whenever we can + // obtain the `n`). + if constexpr (!std::assignable_from<_Iter&, _Sent> && std::sized_sentinel_for<_Sent, _Iter>) { + auto __n = __last - __first; + auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj); + return {std::move(__end), std::move(__func)}; + } else { + auto __end = std::__for_each(std::move(__first), std::move(__last), __func, __proj); + return {std::move(__end), std::move(__func)}; + } } public: diff --git a/lib/libcxx/include/__algorithm/ranges_for_each_n.h b/lib/libcxx/include/__algorithm/ranges_for_each_n.h index 603cb72323..3aab1b79c1 100644 --- a/lib/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/lib/libcxx/include/__algorithm/ranges_for_each_n.h @@ -9,10 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> @@ -40,11 +40,8 @@ struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { - while (__count-- > 0) { - std::invoke(__func, std::invoke(__proj, *__first)); - ++__first; - } - return {std::move(__first), std::move(__func)}; + auto __last = std::__for_each_n(std::move(__first), __count, __func, __proj); + return {std::move(__last), std::move(__func)}; } }; diff --git a/lib/libcxx/include/__algorithm/ranges_inplace_merge.h b/lib/libcxx/include/__algorithm/ranges_inplace_merge.h index 5879d0e7ef..54581aff39 100644 --- a/lib/libcxx/include/__algorithm/ranges_inplace_merge.h +++ b/lib/libcxx/include/__algorithm/ranges_inplace_merge.h @@ -41,7 +41,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { struct __inplace_merge { template - _LIBCPP_HIDE_FROM_ABI static constexpr auto + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX26 auto __inplace_merge_impl(_Iter __first, _Iter __middle, _Sent __last, _Comp&& __comp, _Proj&& __proj) { auto __last_iter = ranges::next(__middle, __last); std::__inplace_merge<_RangeAlgPolicy>( @@ -51,7 +51,7 @@ struct __inplace_merge { template _Sent, class _Comp = ranges::less, class _Proj = identity> requires sortable<_Iter, _Comp, _Proj> - _LIBCPP_HIDE_FROM_ABI _Iter + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 _Iter operator()(_Iter __first, _Iter __middle, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return __inplace_merge_impl( std::move(__first), std::move(__middle), std::move(__last), std::move(__comp), std::move(__proj)); @@ -59,7 +59,7 @@ struct __inplace_merge { template requires sortable, _Comp, _Proj> - _LIBCPP_HIDE_FROM_ABI borrowed_iterator_t<_Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 borrowed_iterator_t<_Range> operator()(_Range&& __range, iterator_t<_Range> __middle, _Comp __comp = {}, _Proj __proj = {}) const { return __inplace_merge_impl( ranges::begin(__range), std::move(__middle), ranges::end(__range), std::move(__comp), std::move(__proj)); diff --git a/lib/libcxx/include/__algorithm/ranges_iterator_concept.h b/lib/libcxx/include/__algorithm/ranges_iterator_concept.h index 58790e95aa..947dfa4811 100644 --- a/lib/libcxx/include/__algorithm/ranges_iterator_concept.h +++ b/lib/libcxx/include/__algorithm/ranges_iterator_concept.h @@ -44,7 +44,7 @@ consteval auto __get_iterator_concept() { } template -using __iterator_concept _LIBCPP_NODEBUG = decltype(__get_iterator_concept<_Iter>()); +using __iterator_concept _LIBCPP_NODEBUG = decltype(ranges::__get_iterator_concept<_Iter>()); } // namespace ranges _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/ranges_max.h b/lib/libcxx/include/__algorithm/ranges_max.h index f631344422..a8fe13a734 100644 --- a/lib/libcxx/include/__algorithm/ranges_max.h +++ b/lib/libcxx/include/__algorithm/ranges_max.h @@ -9,7 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_MAX_H #define _LIBCPP___ALGORITHM_RANGES_MAX_H -#include <__algorithm/ranges_min_element.h> +#include <__algorithm/min_element.h> #include <__assert> #include <__concepts/copyable.h> #include <__config> @@ -57,7 +57,7 @@ struct __max { __il.begin() != __il.end(), "initializer_list must contain at least one element"); auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) -> bool { return std::invoke(__comp, __rhs, __lhs); }; - return *ranges::__min_element_impl(__il.begin(), __il.end(), __comp_lhs_rhs_swapped, __proj); + return *std::__min_element(__il.begin(), __il.end(), __comp_lhs_rhs_swapped, __proj); } template bool { return std::invoke(__comp, __rhs, __lhs); }; - return *ranges::__min_element_impl(std::move(__first), std::move(__last), __comp_lhs_rhs_swapped, __proj); + return *std::__min_element(std::move(__first), std::move(__last), __comp_lhs_rhs_swapped, __proj); } else { range_value_t<_Rp> __result = *__first; while (++__first != __last) { diff --git a/lib/libcxx/include/__algorithm/ranges_max_element.h b/lib/libcxx/include/__algorithm/ranges_max_element.h index 869f71ecc8..db6d5f6b9c 100644 --- a/lib/libcxx/include/__algorithm/ranges_max_element.h +++ b/lib/libcxx/include/__algorithm/ranges_max_element.h @@ -9,7 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H #define _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H -#include <__algorithm/ranges_min_element.h> +#include <__algorithm/min_element.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -40,7 +40,7 @@ struct __max_element { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) -> bool { return std::invoke(__comp, __rhs, __lhs); }; - return ranges::__min_element_impl(__first, __last, __comp_lhs_rhs_swapped, __proj); + return std::__min_element(__first, __last, __comp_lhs_rhs_swapped, __proj); } template operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) -> bool { return std::invoke(__comp, __rhs, __lhs); }; - return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); + return std::__min_element(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); } }; diff --git a/lib/libcxx/include/__algorithm/ranges_min.h b/lib/libcxx/include/__algorithm/ranges_min.h index 302b5d7975..9f1c78eaa9 100644 --- a/lib/libcxx/include/__algorithm/ranges_min.h +++ b/lib/libcxx/include/__algorithm/ranges_min.h @@ -9,7 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_MIN_H #define _LIBCPP___ALGORITHM_RANGES_MIN_H -#include <__algorithm/ranges_min_element.h> +#include <__algorithm/min_element.h> #include <__assert> #include <__concepts/copyable.h> #include <__config> @@ -54,7 +54,7 @@ struct __min { operator()(initializer_list<_Tp> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( __il.begin() != __il.end(), "initializer_list must contain at least one element"); - return *ranges::__min_element_impl(__il.begin(), __il.end(), __comp, __proj); + return *std::__min_element(__il.begin(), __il.end(), __comp, __proj); } template && !__is_cheap_to_copy>) { - return *ranges::__min_element_impl(__first, __last, __comp, __proj); + return *std::__min_element(__first, __last, __comp, __proj); } else { range_value_t<_Rp> __result = *__first; while (++__first != __last) { diff --git a/lib/libcxx/include/__algorithm/ranges_min_element.h b/lib/libcxx/include/__algorithm/ranges_min_element.h index fb92ae56bc..5deb409ccd 100644 --- a/lib/libcxx/include/__algorithm/ranges_min_element.h +++ b/lib/libcxx/include/__algorithm/ranges_min_element.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_MIN_ELEMENT_H #define _LIBCPP___ALGORITHM_RANGES_MIN_ELEMENT_H +#include <__algorithm/min_element.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -32,20 +33,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { - -// TODO(ranges): `ranges::min_element` can now simply delegate to `std::__min_element`. -template -_LIBCPP_HIDE_FROM_ABI constexpr _Ip __min_element_impl(_Ip __first, _Sp __last, _Comp& __comp, _Proj& __proj) { - if (__first == __last) - return __first; - - _Ip __i = __first; - while (++__i != __last) - if (std::invoke(__comp, std::invoke(__proj, *__i), std::invoke(__proj, *__first))) - __first = __i; - return __first; -} - struct __min_element { template _Sp, @@ -53,7 +40,7 @@ struct __min_element { indirect_strict_weak_order> _Comp = ranges::less> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { - return ranges::__min_element_impl(__first, __last, __comp, __proj); + return std::__min_element(__first, __last, __comp, __proj); } template , _Proj>> _Comp = ranges::less> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { - return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); + return std::__min_element(ranges::begin(__r), ranges::end(__r), __comp, __proj); } }; diff --git a/lib/libcxx/include/__algorithm/ranges_stable_partition.h b/lib/libcxx/include/__algorithm/ranges_stable_partition.h index cfc02e1e97..d8cfc8d941 100644 --- a/lib/libcxx/include/__algorithm/ranges_stable_partition.h +++ b/lib/libcxx/include/__algorithm/ranges_stable_partition.h @@ -44,7 +44,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { struct __stable_partition { template - _LIBCPP_HIDE_FROM_ABI static subrange<__remove_cvref_t<_Iter>> + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX26 subrange<__remove_cvref_t<_Iter>> __stable_partition_fn_impl(_Iter&& __first, _Sent&& __last, _Pred&& __pred, _Proj&& __proj) { auto __last_iter = ranges::next(__first, __last); @@ -60,7 +60,8 @@ struct __stable_partition { class _Proj = identity, indirect_unary_predicate> _Pred> requires permutable<_Iter> - _LIBCPP_HIDE_FROM_ABI subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 subrange<_Iter> + operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return __stable_partition_fn_impl(__first, __last, __pred, __proj); } @@ -68,7 +69,7 @@ struct __stable_partition { class _Proj = identity, indirect_unary_predicate, _Proj>> _Pred> requires permutable> - _LIBCPP_HIDE_FROM_ABI borrowed_subrange_t<_Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __stable_partition_fn_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/lib/libcxx/include/__algorithm/ranges_stable_sort.h b/lib/libcxx/include/__algorithm/ranges_stable_sort.h index 9c7df80ae9..6e17d0d0c7 100644 --- a/lib/libcxx/include/__algorithm/ranges_stable_sort.h +++ b/lib/libcxx/include/__algorithm/ranges_stable_sort.h @@ -41,7 +41,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { struct __stable_sort { template - _LIBCPP_HIDE_FROM_ABI static _Iter __stable_sort_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR_SINCE_CXX26 _Iter + __stable_sort_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { auto __last_iter = ranges::next(__first, __last); auto&& __projected_comp = std::__make_projected(__comp, __proj); @@ -52,13 +53,14 @@ struct __stable_sort { template _Sent, class _Comp = ranges::less, class _Proj = identity> requires sortable<_Iter, _Comp, _Proj> - _LIBCPP_HIDE_FROM_ABI _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 _Iter + operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return __stable_sort_fn_impl(std::move(__first), std::move(__last), __comp, __proj); } template requires sortable, _Comp, _Proj> - _LIBCPP_HIDE_FROM_ABI borrowed_iterator_t<_Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 borrowed_iterator_t<_Range> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { return __stable_sort_fn_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); } diff --git a/lib/libcxx/include/__algorithm/rotate.h b/lib/libcxx/include/__algorithm/rotate.h index df4ca95aac..c676980f0c 100644 --- a/lib/libcxx/include/__algorithm/rotate.h +++ b/lib/libcxx/include/__algorithm/rotate.h @@ -9,12 +9,19 @@ #ifndef _LIBCPP___ALGORITHM_ROTATE_H #define _LIBCPP___ALGORITHM_ROTATE_H +#include <__algorithm/copy.h> +#include <__algorithm/copy_backward.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/move.h> #include <__algorithm/move_backward.h> #include <__algorithm/swap_ranges.h> #include <__config> +#include <__cstddef/size_t.h> +#include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> +#include <__memory/construct_at.h> +#include <__memory/pointer_traits.h> +#include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_trivially_assignable.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -185,6 +192,44 @@ __rotate(_Iterator __first, _Iterator __middle, _Sentinel __last) { return _Ret(std::move(__result), std::move(__last_iter)); } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, false>, __bit_iterator<_Cp, false> > +__rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) { + using _I1 = __bit_iterator<_Cp, false>; + using difference_type = typename _I1::difference_type; + difference_type __d1 = __middle - __first; + difference_type __d2 = __last - __middle; + _I1 __r = __first + __d2; + while (__d1 != 0 && __d2 != 0) { + if (__d1 <= __d2) { + if (__d1 <= __bit_array<_Cp>::capacity()) { + __bit_array<_Cp> __b(__d1); + std::copy(__first, __middle, __b.begin()); + std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first)); + break; + } else { + __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle); + __first = __middle; + __middle = __mp; + __d2 -= __d1; + } + } else { + if (__d2 <= __bit_array<_Cp>::capacity()) { + __bit_array<_Cp> __b(__d2); + std::copy(__middle, __last, __b.begin()); + std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last)); + break; + } else { + __bit_iterator<_Cp, false> __mp = __first + __d2; + std::swap_ranges(__first, __mp, __middle); + __first = __mp; + __d1 -= __d2; + } + } + } + return std::make_pair(__r, __last); +} + template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { diff --git a/lib/libcxx/include/__algorithm/simd_utils.h b/lib/libcxx/include/__algorithm/simd_utils.h index 4e03723a32..47942a09e6 100644 --- a/lib/libcxx/include/__algorithm/simd_utils.h +++ b/lib/libcxx/include/__algorithm/simd_utils.h @@ -15,8 +15,6 @@ #include <__bit/countr.h> #include <__config> #include <__cstddef/size_t.h> -#include <__type_traits/is_arithmetic.h> -#include <__type_traits/is_same.h> #include <__utility/integer_sequence.h> #include @@ -28,7 +26,9 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> // TODO: Find out how altivec changes things and allow vectorizations there too. -#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__) +// TODO: Simplify this condition once we stop building with AppleClang 15 in the CI. +#if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(__ALTIVEC__) && \ + !(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1600) # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 #else # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0 @@ -53,20 +53,20 @@ struct __get_as_integer_type_impl; template <> struct __get_as_integer_type_impl<1> { - using type = uint8_t; + using type _LIBCPP_NODEBUG = uint8_t; }; template <> struct __get_as_integer_type_impl<2> { - using type = uint16_t; + using type _LIBCPP_NODEBUG = uint16_t; }; template <> struct __get_as_integer_type_impl<4> { - using type = uint32_t; + using type _LIBCPP_NODEBUG = uint32_t; }; template <> struct __get_as_integer_type_impl<8> { - using type = uint64_t; + using type _LIBCPP_NODEBUG = uint64_t; }; template @@ -78,7 +78,7 @@ using __get_as_integer_type_t _LIBCPP_NODEBUG = typename __get_as_integer_type_i # if defined(__AVX__) || defined(__MVS__) template inline constexpr size_t __native_vector_size = 32 / sizeof(_Tp); -# elif defined(__SSE__) || defined(__ARM_NEON__) +# elif defined(__SSE__) || defined(__ARM_NEON) template inline constexpr size_t __native_vector_size = 16 / sizeof(_Tp); # elif defined(__MMX__) diff --git a/lib/libcxx/include/__algorithm/sort.h b/lib/libcxx/include/__algorithm/sort.h index 8dd0721f2c..06cb5b8ce7 100644 --- a/lib/libcxx/include/__algorithm/sort.h +++ b/lib/libcxx/include/__algorithm/sort.h @@ -17,6 +17,7 @@ #include <__algorithm/partial_sort.h> #include <__algorithm/unwrap_iter.h> #include <__assert> +#include <__bit/bit_log2.h> #include <__bit/blsr.h> #include <__bit/countl.h> #include <__bit/countr.h> @@ -34,7 +35,7 @@ #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_same.h> #include <__type_traits/is_trivially_copyable.h> -#include <__type_traits/remove_cvref.h> +#include <__type_traits/make_unsigned.h> #include <__utility/move.h> #include <__utility/pair.h> #include @@ -52,8 +53,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value_type> inline const bool __use_branchless_sort = __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value && - (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> || - __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>); + (__desugars_to_v<__less_tag, _Compare, _Tp, _Tp> || __desugars_to_v<__greater_tag, _Compare, _Tp, _Tp>); namespace __detail { @@ -359,10 +359,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos( // Swap one pair on each iteration as long as both bitsets have at least one // element for swapping. while (__left_bitset != 0 && __right_bitset != 0) { - difference_type __tz_left = __libcpp_ctz(__left_bitset); - __left_bitset = __libcpp_blsr(__left_bitset); - difference_type __tz_right = __libcpp_ctz(__right_bitset); - __right_bitset = __libcpp_blsr(__right_bitset); + difference_type __tz_left = std::__countr_zero(__left_bitset); + __left_bitset = std::__libcpp_blsr(__left_bitset); + difference_type __tz_right = std::__countr_zero(__right_bitset); + __right_bitset = std::__libcpp_blsr(__right_bitset); _Ops::iter_swap(__first + __tz_left, __last - __tz_right); } } @@ -458,7 +458,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within( // Swap within the left side. Need to find set positions in the reverse // order. while (__left_bitset != 0) { - difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset); + difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset); __left_bitset &= (static_cast(1) << __tz_left) - 1; _RandomAccessIterator __it = __first + __tz_left; if (__it != __lm1) { @@ -471,7 +471,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within( // Swap within the right side. Need to find set positions in the reverse // order. while (__right_bitset != 0) { - difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset); + difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset); __right_bitset &= (static_cast(1) << __tz_right) - 1; _RandomAccessIterator __it = __lm1 - __tz_right; if (__it != __first) { @@ -828,25 +828,6 @@ void __introsort(_RandomAccessIterator __first, } } -template -inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) { - if (__n == 0) - return 0; - if (sizeof(__n) <= sizeof(unsigned)) - return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast(__n)); - if (sizeof(__n) <= sizeof(unsigned long)) - return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast(__n)); - if (sizeof(__n) <= sizeof(unsigned long long)) - return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast(__n)); - - _Number __log2 = 0; - while (__n > 1) { - __log2++; - __n >>= 1; - } - return __log2; -} - template void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp); @@ -880,7 +861,7 @@ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) { typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - difference_type __depth_limit = 2 * std::__log2i(__last - __first); + difference_type __depth_limit = 2 * std::__bit_log2(std::__to_unsigned_like(__last - __first)); // Only use bitset partitioning for arithmetic types. We should also check // that the default comparator is in use so that we are sure that there are no diff --git a/lib/libcxx/include/__algorithm/stable_partition.h b/lib/libcxx/include/__algorithm/stable_partition.h index 2ba7239a3a..b389ae2508 100644 --- a/lib/libcxx/include/__algorithm/stable_partition.h +++ b/lib/libcxx/include/__algorithm/stable_partition.h @@ -16,6 +16,7 @@ #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__memory/construct_at.h> #include <__memory/destruct_n.h> #include <__memory/unique_ptr.h> #include <__memory/unique_temporary_buffer.h> @@ -33,7 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_HIDE_FROM_ABI _ForwardIterator __stable_partition_impl( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 _ForwardIterator __stable_partition_impl( _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, @@ -61,7 +62,7 @@ _LIBCPP_HIDE_FROM_ABI _ForwardIterator __stable_partition_impl( // Move the falses into the temporary buffer, and the trues to the front of the line // Update __first to always point to the end of the trues value_type* __t = __p.first; - ::new ((void*)__t) value_type(_Ops::__iter_move(__first)); + std::__construct_at(__t, _Ops::__iter_move(__first)); __d.template __incr(); ++__t; _ForwardIterator __i = __first; @@ -70,7 +71,7 @@ _LIBCPP_HIDE_FROM_ABI _ForwardIterator __stable_partition_impl( *__first = _Ops::__iter_move(__i); ++__first; } else { - ::new ((void*)__t) value_type(_Ops::__iter_move(__i)); + std::__construct_at(__t, _Ops::__iter_move(__i)); __d.template __incr(); ++__t; } @@ -116,7 +117,7 @@ __second_half_done: } template -_LIBCPP_HIDE_FROM_ABI _ForwardIterator +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 _ForwardIterator __stable_partition_impl(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, forward_iterator_tag) { typedef typename iterator_traits<_ForwardIterator>::difference_type difference_type; typedef typename iterator_traits<_ForwardIterator>::value_type value_type; @@ -145,7 +146,7 @@ __stable_partition_impl(_ForwardIterator __first, _ForwardIterator __last, _Pred } template -_BidirectionalIterator __stable_partition_impl( +_LIBCPP_CONSTEXPR_SINCE_CXX26 _BidirectionalIterator __stable_partition_impl( _BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, @@ -179,7 +180,7 @@ _BidirectionalIterator __stable_partition_impl( // Move the falses into the temporary buffer, and the trues to the front of the line // Update __first to always point to the end of the trues value_type* __t = __p.first; - ::new ((void*)__t) value_type(_Ops::__iter_move(__first)); + std::__construct_at(__t, _Ops::__iter_move(__first)); __d.template __incr(); ++__t; _BidirectionalIterator __i = __first; @@ -188,7 +189,7 @@ _BidirectionalIterator __stable_partition_impl( *__first = _Ops::__iter_move(__i); ++__first; } else { - ::new ((void*)__t) value_type(_Ops::__iter_move(__i)); + std::__construct_at(__t, _Ops::__iter_move(__i)); __d.template __incr(); ++__t; } @@ -247,7 +248,7 @@ __second_half_done: } template -_LIBCPP_HIDE_FROM_ABI _BidirectionalIterator __stable_partition_impl( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 _BidirectionalIterator __stable_partition_impl( _BidirectionalIterator __first, _BidirectionalIterator __last, _Predicate __pred, bidirectional_iterator_tag) { typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; @@ -283,14 +284,14 @@ _LIBCPP_HIDE_FROM_ABI _BidirectionalIterator __stable_partition_impl( } template -_LIBCPP_HIDE_FROM_ABI _ForwardIterator __stable_partition( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 _ForwardIterator __stable_partition( _ForwardIterator __first, _ForwardIterator __last, _Predicate&& __pred, _IterCategory __iter_category) { return std::__stable_partition_impl<_AlgPolicy, __remove_cvref_t<_Predicate>&>( std::move(__first), std::move(__last), __pred, __iter_category); } template -inline _LIBCPP_HIDE_FROM_ABI _ForwardIterator +_LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX26 _ForwardIterator stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { using _IterCategory = typename iterator_traits<_ForwardIterator>::iterator_category; return std::__stable_partition<_ClassicAlgPolicy, _Predicate&>( diff --git a/lib/libcxx/include/__algorithm/stable_sort.h b/lib/libcxx/include/__algorithm/stable_sort.h index 3cfbcf08d2..1ca66f6a51 100644 --- a/lib/libcxx/include/__algorithm/stable_sort.h +++ b/lib/libcxx/include/__algorithm/stable_sort.h @@ -25,10 +25,9 @@ #include <__memory/unique_temporary_buffer.h> #include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> -#include <__type_traits/is_integral.h> +#include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_same.h> #include <__type_traits/is_trivially_assignable.h> -#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -201,7 +200,7 @@ struct __stable_sort_switch { #if _LIBCPP_STD_VER >= 17 template _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() { - static_assert(is_integral<_Tp>::value); + static_assert(__is_ordered_integer_representable_v<_Tp>); if constexpr (sizeof(_Tp) == 1) { return 1 << 8; } @@ -211,7 +210,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() { template _LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() { - static_assert(is_integral<_Tp>::value); + static_assert(__is_ordered_integer_representable_v<_Tp>); if constexpr (sizeof(_Tp) >= 8) { return 1 << 15; } @@ -245,14 +244,19 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort( } #if _LIBCPP_STD_VER >= 17 - constexpr auto __default_comp = - __desugars_to_v<__totally_ordered_less_tag, __remove_cvref_t<_Compare>, value_type, value_type >; - constexpr auto __integral_value = - is_integral_v && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>; - constexpr auto __allowed_radix_sort = __default_comp && __integral_value; - if constexpr (__allowed_radix_sort) { - if (__len <= __buff_size && __len >= static_cast(__radix_sort_min_bound()) && - __len <= static_cast(__radix_sort_max_bound())) { + constexpr auto __default_comp = __desugars_to_v<__less_tag, _Compare, value_type, value_type >; + constexpr auto __radix_sortable = + __is_ordered_integer_representable_v && + is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>; + if constexpr (__default_comp && __radix_sortable) { + if (__len <= __buff_size && __len >= static_cast(std::__radix_sort_min_bound()) && + __len <= static_cast(std::__radix_sort_max_bound())) { + if (__libcpp_is_constant_evaluated()) { + for (auto* __p = __buff; __p < __buff + __buff_size; ++__p) { + std::__construct_at(__p); + } + } + std::__radix_sort(__first, __last, __buff); return; } diff --git a/lib/libcxx/include/__algorithm/swap_ranges.h b/lib/libcxx/include/__algorithm/swap_ranges.h index 54b453b723..2731d4feaa 100644 --- a/lib/libcxx/include/__algorithm/swap_ranges.h +++ b/lib/libcxx/include/__algorithm/swap_ranges.h @@ -10,9 +10,12 @@ #define _LIBCPP___ALGORITHM_SWAP_RANGES_H #include <__algorithm/iterator_operations.h> +#include <__algorithm/min.h> #include <__config> +#include <__fwd/bit_reference.h> #include <__utility/move.h> #include <__utility/pair.h> +#include <__utility/swap.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -23,6 +26,165 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_aligned( + __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) { + using _I1 = __bit_iterator<_Cl, false>; + using difference_type = typename _I1::difference_type; + using __storage_type = typename _I1::__storage_type; + + const int __bits_per_word = _I1::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1; + *__first.__seg_ |= __b2; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) + swap(*__first.__seg_, *__result.__seg_); + // do last word + if (__n > 0) { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1; + *__first.__seg_ |= __b2; + __result.__ctz_ = static_cast(__n); + } + } + return __result; +} + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cr, false> __swap_ranges_unaligned( + __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) { + using _I1 = __bit_iterator<_Cl, false>; + using difference_type = typename _I1::difference_type; + using __storage_type = typename _I1::__storage_type; + + const int __bits_per_word = _I1::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) { + // do first word + if (__first.__ctz_ != 0) { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = std::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) { + unsigned __s = __result.__ctz_ - __first.__ctz_; + *__result.__seg_ |= __b1 << __s; + *__first.__seg_ |= __b2 >> __s; + } else { + unsigned __s = __first.__ctz_ - __result.__ctz_; + *__result.__seg_ |= __b1 >> __s; + *__first.__seg_ |= __b2 << __s; + } + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + unsigned __s = __first.__ctz_ + __ddn; + *__result.__seg_ |= __b1 >> __s; + *__first.__seg_ |= __b2 << __s; + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __m = ~__storage_type(0) << __result.__ctz_; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { + __storage_type __b1 = *__first.__seg_; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 << __result.__ctz_; + *__first.__seg_ = __b2 >> __result.__ctz_; + ++__result.__seg_; + __b2 = *__result.__seg_ & ~__m; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b1 >> __clz_r; + *__first.__seg_ |= __b2 << __clz_r; + } + // do last word + if (__n > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __dn = std::min<__storage_type>(__n, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 << __result.__ctz_; + *__first.__seg_ |= __b2 >> __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 >> __dn; + *__first.__seg_ |= __b2 << __dn; + __result.__ctz_ = static_cast(__n); + } + } + } + return __result; +} + +// 2+1 iterators: size2 >= size1; used by std::swap_ranges. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> > +__swap_ranges(__bit_iterator<_Cl, false> __first1, + __bit_iterator<_Cl, false> __last1, + __bit_iterator<_Cr, false> __first2) { + if (__first1.__ctz_ == __first2.__ctz_) + return std::make_pair(__last1, std::__swap_ranges_aligned(__first1, __last1, __first2)); + return std::make_pair(__last1, std::__swap_ranges_unaligned(__first1, __last1, __first2)); +} + +// 2+2 iterators: used by std::ranges::swap_ranges. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> > +__swap_ranges(__bit_iterator<_Cl, false> __first1, + __bit_iterator<_Cl, false> __last1, + __bit_iterator<_Cr, false> __first2, + __bit_iterator<_Cr, false> __last2) { + if (__last1 - __first1 < __last2 - __first2) + return std::make_pair(__last1, std::__swap_ranges<_AlgPolicy>(__first1, __last1, __first2).second); + return std::make_pair(std::__swap_ranges<_AlgPolicy>(__first2, __last2, __first1).second, __last2); +} + // 2+2 iterators: the shorter size will be used. template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator1, _ForwardIterator2> diff --git a/lib/libcxx/include/__assert b/lib/libcxx/include/__assert index 90eaa60235..a9451daf47 100644 --- a/lib/libcxx/include/__assert +++ b/lib/libcxx/include/__assert @@ -20,8 +20,8 @@ #define _LIBCPP_ASSERT(expression, message) \ (__builtin_expect(static_cast(expression), 1) \ ? (void)0 \ - : _LIBCPP_ASSERTION_HANDLER(__FILE__ ":" _LIBCPP_TOSTRING(__LINE__) ": assertion " _LIBCPP_TOSTRING( \ - expression) " failed: " message "\n")) + : _LIBCPP_ASSERTION_HANDLER(__FILE__ ":" _LIBCPP_TOSTRING( \ + __LINE__) ": libc++ Hardening assertion " _LIBCPP_TOSTRING(expression) " failed: " message "\n")) // WARNING: __builtin_assume can currently inhibit optimizations. Only add assumptions with a clear // optimization intent. See https://discourse.llvm.org/t/llvm-assume-blocks-optimization/71609 for a diff --git a/lib/libcxx/include/__assertion_handler b/lib/libcxx/include/__assertion_handler index 1d6b21fc6b..f115658f9f 100644 --- a/lib/libcxx/include/__assertion_handler +++ b/lib/libcxx/include/__assertion_handler @@ -13,9 +13,11 @@ #if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) # include <__cxx03/__config> # include <__cxx03/__verbose_abort> +# include <__cxx03/__verbose_trap> #else # include <__config> # include <__verbose_abort> +# include <__verbose_trap> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -28,18 +30,7 @@ #else -# if __has_builtin(__builtin_verbose_trap) -// AppleClang shipped a slightly different version of __builtin_verbose_trap from the upstream -// version before upstream Clang actually got the builtin. -// TODO: Remove once AppleClang supports the two-arguments version of the builtin. -# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700 -# define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap(message) -# else -# define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap("libc++", message) -# endif -# else -# define _LIBCPP_ASSERTION_HANDLER(message) ((void)message, __builtin_trap()) -# endif +# define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_TRAP(message) #endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG diff --git a/lib/libcxx/include/__atomic/atomic.h b/lib/libcxx/include/__atomic/atomic.h index 975a479e20..eead49dde6 100644 --- a/lib/libcxx/include/__atomic/atomic.h +++ b/lib/libcxx/include/__atomic/atomic.h @@ -23,6 +23,7 @@ #include <__type_traits/is_integral.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_pointer.h> #include <__type_traits/remove_volatile.h> @@ -40,6 +41,8 @@ struct __atomic_base // false { mutable __cxx_atomic_impl<_Tp> __a_; + using value_type = _Tp; + #if _LIBCPP_STD_VER >= 17 static constexpr bool is_always_lock_free = __libcpp_is_always_lock_free<__cxx_atomic_impl<_Tp> >::__value; #endif @@ -145,6 +148,8 @@ template struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> { using __base _LIBCPP_NODEBUG = __atomic_base<_Tp, false>; + using difference_type = typename __base::value_type; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __atomic_base() _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {} @@ -226,11 +231,15 @@ struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > { } }; +template +struct __check_atomic_mandates { + using type _LIBCPP_NODEBUG = _Tp; + static_assert(is_trivially_copyable<_Tp>::value, "std::atomic requires that 'T' be a trivially copyable type"); +}; + template -struct atomic : public __atomic_base<_Tp> { +struct atomic : public __atomic_base::type> { using __base _LIBCPP_NODEBUG = __atomic_base<_Tp>; - using value_type = _Tp; - using difference_type = value_type; #if _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI atomic() = default; @@ -258,8 +267,8 @@ struct atomic : public __atomic_base<_Tp> { template struct atomic<_Tp*> : public __atomic_base<_Tp*> { using __base _LIBCPP_NODEBUG = __atomic_base<_Tp*>; - using value_type = _Tp*; - using difference_type = ptrdiff_t; + + using difference_type = ptrdiff_t; _LIBCPP_HIDE_FROM_ABI atomic() _NOEXCEPT = default; @@ -361,7 +370,7 @@ private: // https://github.com/llvm/llvm-project/issues/47978 // clang bug: __old is not updated on failure for atomic::compare_exchange_weak // Note __old = __self.load(memory_order_relaxed) will not work - std::__cxx_atomic_load_inplace(std::addressof(__self.__a_), &__old, memory_order_relaxed); + std::__cxx_atomic_load_inplace(std::addressof(__self.__a_), std::addressof(__old), memory_order_relaxed); } # endif __new = __operation(__old, __operand); diff --git a/lib/libcxx/include/__atomic/atomic_ref.h b/lib/libcxx/include/__atomic/atomic_ref.h index 177ea646b6..b5493662c5 100644 --- a/lib/libcxx/include/__atomic/atomic_ref.h +++ b/lib/libcxx/include/__atomic/atomic_ref.h @@ -119,7 +119,7 @@ public: // that the pointer is going to be aligned properly at runtime because that is a (checked) precondition // of atomic_ref's constructor. static constexpr bool is_always_lock_free = - __atomic_always_lock_free(sizeof(_Tp), &__get_aligner_instance::__instance); + __atomic_always_lock_free(sizeof(_Tp), std::addressof(__get_aligner_instance::__instance)); _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const noexcept { return __atomic_is_lock_free(sizeof(_Tp), __ptr_); } diff --git a/lib/libcxx/include/__atomic/memory_order.h b/lib/libcxx/include/__atomic/memory_order.h index 44790fe888..355804312b 100644 --- a/lib/libcxx/include/__atomic/memory_order.h +++ b/lib/libcxx/include/__atomic/memory_order.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // to pin the underlying type in C++20. enum __legacy_memory_order { __mo_relaxed, __mo_consume, __mo_acquire, __mo_release, __mo_acq_rel, __mo_seq_cst }; -using __memory_order_underlying_t _LIBCPP_NODEBUG = underlying_type<__legacy_memory_order>::type; +using __memory_order_underlying_t _LIBCPP_NODEBUG = __underlying_type_t<__legacy_memory_order>; #if _LIBCPP_STD_VER >= 20 @@ -37,7 +37,7 @@ enum class memory_order : __memory_order_underlying_t { seq_cst = __mo_seq_cst }; -static_assert(is_same::type, __memory_order_underlying_t>::value, +static_assert(is_same<__underlying_type_t, __memory_order_underlying_t>::value, "unexpected underlying type for std::memory_order"); inline constexpr auto memory_order_relaxed = memory_order::relaxed; diff --git a/lib/libcxx/include/__atomic/support.h b/lib/libcxx/include/__atomic/support.h index 4b555ab483..99d0f6aa54 100644 --- a/lib/libcxx/include/__atomic/support.h +++ b/lib/libcxx/include/__atomic/support.h @@ -10,7 +10,6 @@ #define _LIBCPP___ATOMIC_SUPPORT_H #include <__config> -#include <__type_traits/is_trivially_copyable.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -113,8 +112,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > struct __cxx_atomic_impl : public _Base { - static_assert(is_trivially_copyable<_Tp>::value, "std::atomic requires that 'T' be a trivially copyable type"); - _LIBCPP_HIDE_FROM_ABI __cxx_atomic_impl() _NOEXCEPT = default; _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp __value) _NOEXCEPT : _Base(__value) {} }; diff --git a/lib/libcxx/include/__atomic/support/c11.h b/lib/libcxx/include/__atomic/support/c11.h index 177a075be4..1ad299882a 100644 --- a/lib/libcxx/include/__atomic/support/c11.h +++ b/lib/libcxx/include/__atomic/support/c11.h @@ -35,7 +35,7 @@ struct __cxx_atomic_base_impl { } #endif // _LIBCPP_CXX03_LANG _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp __value) _NOEXCEPT : __a_value(__value) {} - _LIBCPP_DISABLE_EXTENSION_WARNING _Atomic(_Tp) __a_value; + _Atomic(_Tp) __a_value; }; #define __cxx_atomic_is_lock_free(__s) __c11_atomic_is_lock_free(__s) diff --git a/lib/libcxx/include/__bit/bit_ceil.h b/lib/libcxx/include/__bit/bit_ceil.h index cfd792dc2e..99881a8538 100644 --- a/lib/libcxx/include/__bit/bit_ceil.h +++ b/lib/libcxx/include/__bit/bit_ceil.h @@ -11,8 +11,8 @@ #include <__assert> #include <__bit/countl.h> -#include <__concepts/arithmetic.h> #include <__config> +#include <__type_traits/integer_traits.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -41,7 +41,7 @@ template # if _LIBCPP_STD_VER >= 20 -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp bit_ceil(_Tp __t) noexcept { return std::__bit_ceil(__t); } diff --git a/lib/libcxx/include/__bit/bit_floor.h b/lib/libcxx/include/__bit/bit_floor.h index 133e369504..799a064130 100644 --- a/lib/libcxx/include/__bit/bit_floor.h +++ b/lib/libcxx/include/__bit/bit_floor.h @@ -10,9 +10,8 @@ #define _LIBCPP___BIT_BIT_FLOOR_H #include <__bit/bit_log2.h> -#include <__concepts/arithmetic.h> #include <__config> -#include +#include <__type_traits/integer_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp bit_floor(_Tp __t) noexcept { return __t == 0 ? 0 : _Tp{1} << std::__bit_log2(__t); } diff --git a/lib/libcxx/include/__bit/bit_log2.h b/lib/libcxx/include/__bit/bit_log2.h index 94ee6c3b2b..8077cd91d6 100644 --- a/lib/libcxx/include/__bit/bit_log2.h +++ b/lib/libcxx/include/__bit/bit_log2.h @@ -11,7 +11,7 @@ #include <__bit/countl.h> #include <__config> -#include <__type_traits/is_unsigned_integer.h> +#include <__type_traits/integer_traits.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -20,16 +20,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 14 - template -_LIBCPP_HIDE_FROM_ABI constexpr _Tp __bit_log2(_Tp __t) noexcept { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__bit_log2 requires an unsigned integer type"); +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __bit_log2(_Tp __t) _NOEXCEPT { + static_assert(__is_unsigned_integer_v<_Tp>, "__bit_log2 requires an unsigned integer type"); return numeric_limits<_Tp>::digits - 1 - std::__countl_zero(__t); } -#endif // _LIBCPP_STD_VER >= 14 - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___BIT_BIT_LOG2_H diff --git a/lib/libcxx/include/__bit/bit_width.h b/lib/libcxx/include/__bit/bit_width.h index 853e481776..75050acabb 100644 --- a/lib/libcxx/include/__bit/bit_width.h +++ b/lib/libcxx/include/__bit/bit_width.h @@ -10,8 +10,8 @@ #define _LIBCPP___BIT_BIT_WIDTH_H #include <__bit/bit_log2.h> -#include <__concepts/arithmetic.h> #include <__config> +#include <__type_traits/integer_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int bit_width(_Tp __t) noexcept { return __t == 0 ? 0 : std::__bit_log2(__t) + 1; } diff --git a/lib/libcxx/include/__bit/countl.h b/lib/libcxx/include/__bit/countl.h index d4df1d049b..0759140208 100644 --- a/lib/libcxx/include/__bit/countl.h +++ b/lib/libcxx/include/__bit/countl.h @@ -6,16 +6,11 @@ // //===----------------------------------------------------------------------===// -// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can -// refactor this code to exclusively use __builtin_clzg. - #ifndef _LIBCPP___BIT_COUNTL_H #define _LIBCPP___BIT_COUNTL_H -#include <__bit/rotate.h> -#include <__concepts/arithmetic.h> #include <__config> -#include <__type_traits/is_unsigned_integer.h> +#include <__type_traits/integer_traits.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -27,79 +22,20 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { - return __builtin_clz(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { - return __builtin_clzl(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { - return __builtin_clzll(__x); -} - -#if _LIBCPP_HAS_INT128 -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT { -# if __has_builtin(__builtin_clzg) - return __builtin_clzg(__x); -# else - // The function is written in this form due to C++ constexpr limitations. - // The algorithm: - // - Test whether any bit in the high 64-bits is set - // - No bits set: - // - The high 64-bits contain 64 leading zeros, - // - Add the result of the low 64-bits. - // - Any bits set: - // - The number of leading zeros of the input is the number of leading - // zeros in the high 64-bits. - return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast(__x))) - : __builtin_clzll(static_cast(__x >> 64)); -# endif -} -#endif // _LIBCPP_HAS_INT128 - template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type"); -#if __has_builtin(__builtin_clzg) + static_assert(__is_unsigned_integer_v<_Tp>, "__countl_zero requires an unsigned integer type"); return __builtin_clzg(__t, numeric_limits<_Tp>::digits); -#else // __has_builtin(__builtin_clzg) - if (__t == 0) - return numeric_limits<_Tp>::digits; - - if (sizeof(_Tp) <= sizeof(unsigned int)) - return std::__libcpp_clz(static_cast(__t)) - - (numeric_limits::digits - numeric_limits<_Tp>::digits); - else if (sizeof(_Tp) <= sizeof(unsigned long)) - return std::__libcpp_clz(static_cast(__t)) - - (numeric_limits::digits - numeric_limits<_Tp>::digits); - else if (sizeof(_Tp) <= sizeof(unsigned long long)) - return std::__libcpp_clz(static_cast(__t)) - - (numeric_limits::digits - numeric_limits<_Tp>::digits); - else { - int __ret = 0; - int __iter = 0; - const unsigned int __ulldigits = numeric_limits::digits; - while (true) { - __t = std::__rotl(__t, __ulldigits); - if ((__iter = std::__countl_zero(static_cast(__t))) != __ulldigits) - break; - __ret += __iter; - } - return __ret + __iter; - } -#endif // __has_builtin(__builtin_clzg) } #if _LIBCPP_STD_VER >= 20 -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countl_zero(_Tp __t) noexcept { return std::__countl_zero(__t); } -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countl_one(_Tp __t) noexcept { return __t != numeric_limits<_Tp>::max() ? std::countl_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits; } diff --git a/lib/libcxx/include/__bit/countr.h b/lib/libcxx/include/__bit/countr.h index 2f7571133b..f6c98695d3 100644 --- a/lib/libcxx/include/__bit/countr.h +++ b/lib/libcxx/include/__bit/countr.h @@ -6,15 +6,11 @@ // //===----------------------------------------------------------------------===// -// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can -// refactor this code to exclusively use __builtin_ctzg. - #ifndef _LIBCPP___BIT_COUNTR_H #define _LIBCPP___BIT_COUNTR_H -#include <__bit/rotate.h> -#include <__concepts/arithmetic.h> #include <__config> +#include <__type_traits/integer_traits.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -26,51 +22,20 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { - return __builtin_ctz(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { - return __builtin_ctzl(__x); -} - -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { - return __builtin_ctzll(__x); -} - template -[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { -#if __has_builtin(__builtin_ctzg) +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT { + static_assert(__is_unsigned_integer_v<_Tp>, "__countr_zero only works with unsigned types"); return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); -#else // __has_builtin(__builtin_ctzg) - if (__t == 0) - return numeric_limits<_Tp>::digits; - if (sizeof(_Tp) <= sizeof(unsigned int)) - return std::__libcpp_ctz(static_cast(__t)); - else if (sizeof(_Tp) <= sizeof(unsigned long)) - return std::__libcpp_ctz(static_cast(__t)); - else if (sizeof(_Tp) <= sizeof(unsigned long long)) - return std::__libcpp_ctz(static_cast(__t)); - else { - int __ret = 0; - const unsigned int __ulldigits = numeric_limits::digits; - while (static_cast(__t) == 0uLL) { - __ret += __ulldigits; - __t >>= __ulldigits; - } - return __ret + std::__libcpp_ctz(static_cast(__t)); - } -#endif // __has_builtin(__builtin_ctzg) } #if _LIBCPP_STD_VER >= 20 -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept { return std::__countr_zero(__t); } -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) noexcept { return __t != numeric_limits<_Tp>::max() ? std::countr_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits; } diff --git a/lib/libcxx/include/__bit/has_single_bit.h b/lib/libcxx/include/__bit/has_single_bit.h index 52f5853a1b..b43e69323e 100644 --- a/lib/libcxx/include/__bit/has_single_bit.h +++ b/lib/libcxx/include/__bit/has_single_bit.h @@ -9,8 +9,8 @@ #ifndef _LIBCPP___BIT_HAS_SINGLE_BIT_H #define _LIBCPP___BIT_HAS_SINGLE_BIT_H -#include <__concepts/arithmetic.h> #include <__config> +#include <__type_traits/integer_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -23,7 +23,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_single_bit(_Tp __t) noexcept { return __t != 0 && (((__t & (__t - 1)) == 0)); } diff --git a/lib/libcxx/include/__bit/popcount.h b/lib/libcxx/include/__bit/popcount.h index 5cf0a01d07..8d9ba09938 100644 --- a/lib/libcxx/include/__bit/popcount.h +++ b/lib/libcxx/include/__bit/popcount.h @@ -6,16 +6,11 @@ // //===----------------------------------------------------------------------===// -// TODO: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can -// refactor this code to exclusively use __builtin_popcountg. - #ifndef _LIBCPP___BIT_POPCOUNT_H #define _LIBCPP___BIT_POPCOUNT_H -#include <__bit/rotate.h> -#include <__concepts/arithmetic.h> #include <__config> -#include +#include <__type_traits/integer_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -26,43 +21,20 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT { - return __builtin_popcount(__x); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT { - return __builtin_popcountl(__x); -} - -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT { - return __builtin_popcountll(__x); +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount(_Tp __t) _NOEXCEPT { + static_assert(__is_unsigned_integer_v<_Tp>, "__popcount only works with unsigned types"); + return __builtin_popcountg(__t); } #if _LIBCPP_STD_VER >= 20 -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept { -# if __has_builtin(__builtin_popcountg) - return __builtin_popcountg(__t); -# else // __has_builtin(__builtin_popcountg) - if (sizeof(_Tp) <= sizeof(unsigned int)) - return std::__libcpp_popcount(static_cast(__t)); - else if (sizeof(_Tp) <= sizeof(unsigned long)) - return std::__libcpp_popcount(static_cast(__t)); - else if (sizeof(_Tp) <= sizeof(unsigned long long)) - return std::__libcpp_popcount(static_cast(__t)); - else { - int __ret = 0; - while (__t != 0) { - __ret += std::__libcpp_popcount(static_cast(__t)); - __t >>= numeric_limits::digits; - } - return __ret; - } -# endif // __has_builtin(__builtin_popcountg) + return std::__popcount(__t); } -#endif // _LIBCPP_STD_VER >= 20 +#endif _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__bit/rotate.h b/lib/libcxx/include/__bit/rotate.h index d79d98de29..c6f34bdaf6 100644 --- a/lib/libcxx/include/__bit/rotate.h +++ b/lib/libcxx/include/__bit/rotate.h @@ -9,9 +9,8 @@ #ifndef _LIBCPP___BIT_ROTATE_H #define _LIBCPP___BIT_ROTATE_H -#include <__concepts/arithmetic.h> #include <__config> -#include <__type_traits/is_unsigned_integer.h> +#include <__type_traits/integer_traits.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -25,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // the rotr function becomes the ROR instruction. template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an unsigned integer type"); + static_assert(__is_unsigned_integer_v<_Tp>, "__rotl requires an unsigned integer type"); const int __n = numeric_limits<_Tp>::digits; int __r = __s % __n; @@ -40,7 +39,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT { - static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type"); + static_assert(__is_unsigned_integer_v<_Tp>, "__rotr requires an unsigned integer type"); const int __n = numeric_limits<_Tp>::digits; int __r = __s % __n; @@ -55,12 +54,12 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) #if _LIBCPP_STD_VER >= 20 -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp rotl(_Tp __t, int __cnt) noexcept { return std::__rotl(__t, __cnt); } -template <__libcpp_unsigned_integer _Tp> +template <__unsigned_integer _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Tp rotr(_Tp __t, int __cnt) noexcept { return std::__rotr(__t, __cnt); } diff --git a/lib/libcxx/include/__bit_reference b/lib/libcxx/include/__bit_reference index 67abb02312..a3e6defd40 100644 --- a/lib/libcxx/include/__bit_reference +++ b/lib/libcxx/include/__bit_reference @@ -10,21 +10,35 @@ #ifndef _LIBCPP___BIT_REFERENCE #define _LIBCPP___BIT_REFERENCE +#include <__algorithm/comp.h> +#include <__algorithm/copy.h> +#include <__algorithm/copy_backward.h> #include <__algorithm/copy_n.h> +#include <__algorithm/equal.h> #include <__algorithm/min.h> +#include <__algorithm/rotate.h> +#include <__algorithm/swap_ranges.h> +#include <__assert> #include <__bit/countr.h> #include <__compare/ordering.h> #include <__config> #include <__cstddef/ptrdiff_t.h> #include <__cstddef/size_t.h> +#include <__functional/identity.h> #include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/is_same.h> +#include <__type_traits/is_unsigned.h> #include <__type_traits/void_t.h> +#include <__utility/pair.h> #include <__utility/swap.h> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -55,6 +69,53 @@ struct __size_difference_type_traits<_Cp, __void_t +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __trailing_mask(unsigned __clz) { + static_assert(is_unsigned<_StorageType>::value, "__trailing_mask only works with unsigned types"); + return static_cast<_StorageType>(~static_cast<_StorageType>(0)) >> __clz; +} + +// Creates a mask of type `_StorageType` with a specified number of trailing zeros (__ctz) and sets all remaining +// bits to one. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __leading_mask(unsigned __ctz) { + static_assert(is_unsigned<_StorageType>::value, "__leading_mask only works with unsigned types"); + return static_cast<_StorageType>(~static_cast<_StorageType>(0)) << __ctz; +} + +// Creates a mask of type `_StorageType` with a specified number of leading zeros (__clz), a specified number of +// trailing zeros (__ctz), and sets all bits in between to one. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __middle_mask(unsigned __clz, unsigned __ctz) { + static_assert(is_unsigned<_StorageType>::value, "__middle_mask only works with unsigned types"); + return std::__leading_mask<_StorageType>(__ctz) & std::__trailing_mask<_StorageType>(__clz); +} + +// This function is designed to operate correctly even for smaller integral types like `uint8_t`, `uint16_t`, +// or `unsigned short`. +// See https://github.com/llvm/llvm-project/pull/122410. +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void +__fill_masked_range(_StoragePointer __word, unsigned __clz, unsigned __ctz, bool __fill_val) { + static_assert(is_unsigned::element_type>::value, + "__fill_masked_range must be called with unsigned type"); + using _StorageType = typename pointer_traits<_StoragePointer>::element_type; + _LIBCPP_ASSERT_VALID_INPUT_RANGE( + __ctz + __clz < sizeof(_StorageType) * CHAR_BIT, "__fill_masked_range called with invalid range"); + _StorageType __m = std::__middle_mask<_StorageType>(__clz, __ctz); + if (__fill_val) + *__word |= __m; + else + *__word &= ~__m; +} + template ::value> class __bit_reference { using __storage_type _LIBCPP_NODEBUG = typename _Cp::__storage_type; @@ -104,7 +165,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT { - return __bit_iterator<_Cp, false>(__seg_, static_cast(std::__libcpp_ctz(__mask_))); + return __bit_iterator<_Cp, false>(__seg_, static_cast(std::__countr_zero(__mask_))); } private: @@ -173,7 +234,7 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT { - return __bit_iterator<_Cp, true>(__seg_, static_cast(std::__libcpp_ctz(__mask_))); + return __bit_iterator<_Cp, true>(__seg_, static_cast(std::__countr_zero(__mask_))); } private: @@ -183,422 +244,6 @@ private: __mask_(__m) {} }; -// copy - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __nw = __n / __bits_per_word; - std::copy_n(std::__to_address(__first.__seg_), __nw, std::__to_address(__result.__seg_)); - __n -= __nw * __bits_per_word; - __result.__seg_ += __nw; - // do last word - if (__n > 0) { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(__n); - } - } - return __result; -} - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); - else - *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { - __storage_type __b = *__first.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - ++__result.__seg_; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b >> __clz_r; - } - // do last word - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - __storage_type __dn = std::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __dn; - __result.__ctz_ = static_cast(__n); - } - } - } - return __result; -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> -copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__first.__ctz_ == __result.__ctz_) - return std::__copy_aligned(__first, __last, __result); - return std::__copy_unaligned(__first, __last, __result); -} - -// copy_backward - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_aligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__last.__ctz_ != 0) { - difference_type __dn = std::min(static_cast(__last.__ctz_), __n); - __n -= __dn; - unsigned __clz = __bits_per_word - __last.__ctz_; - __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz); - __storage_type __b = *__last.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); - // __last.__ctz_ = 0 - } - // __last.__ctz_ == 0 || __n == 0 - // __result.__ctz_ == 0 || __n == 0 - // do middle words - __storage_type __nw = __n / __bits_per_word; - __result.__seg_ -= __nw; - __last.__seg_ -= __nw; - std::copy_n(std::__to_address(__last.__seg_), __nw, std::__to_address(__result.__seg_)); - __n -= __nw * __bits_per_word; - // do last word - if (__n > 0) { - __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n); - __storage_type __b = *--__last.__seg_ & __m; - *--__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); - } - } - return __result; -} - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_backward_unaligned( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - using _In = __bit_iterator<_Cp, _IsConst>; - using difference_type = typename _In::difference_type; - using __storage_type = typename _In::__storage_type; - - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__last.__ctz_ != 0) { - difference_type __dn = std::min(static_cast(__last.__ctz_), __n); - __n -= __dn; - unsigned __clz_l = __bits_per_word - __last.__ctz_; - __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l); - __storage_type __b = *__last.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = std::min(__dn, static_cast(__result.__ctz_)); - if (__ddn > 0) { - __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __last.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); - else - *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); - __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - } - if (__dn > 0) { - // __result.__ctz_ == 0 - --__result.__seg_; - __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); - __m = ~__storage_type(0) << __result.__ctz_; - *__result.__seg_ &= ~__m; - __last.__ctz_ -= __dn + __ddn; - *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); - } - // __last.__ctz_ = 0 - } - // __last.__ctz_ == 0 || __n == 0 - // __result.__ctz_ != 0 || __n == 0 - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) >> __clz_r; - for (; __n >= __bits_per_word; __n -= __bits_per_word) { - __storage_type __b = *--__last.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __clz_r; - *--__result.__seg_ &= __m; - *__result.__seg_ |= __b << __result.__ctz_; - } - // do last word - if (__n > 0) { - __m = ~__storage_type(0) << (__bits_per_word - __n); - __storage_type __b = *--__last.__seg_ & __m; - __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __dn = std::min(__n, static_cast(__result.__ctz_)); - __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); - __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - // __result.__ctz_ == 0 - --__result.__seg_; - __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); - __m = ~__storage_type(0) << __result.__ctz_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); - } - } - } - return __result; -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> copy_backward( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__last.__ctz_ == __result.__ctz_) - return std::__copy_backward_aligned(__first, __last, __result); - return std::__copy_backward_unaligned(__first, __last, __result); -} - -// move - -template -inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> -move(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - return std::copy(__first, __last, __result); -} - -// move_backward - -template -inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> move_backward( - __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - return std::copy_backward(__first, __last, __result); -} - -// swap_ranges - -template -_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_aligned( - __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) { - using _I1 = __bit_iterator<_Cl, false>; - using difference_type = typename _I1::difference_type; - using __storage_type = typename _I1::__storage_type; - - const int __bits_per_word = _I1::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1; - *__first.__seg_ |= __b2; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) - swap(*__first.__seg_, *__result.__seg_); - // do last word - if (__n > 0) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1; - *__first.__seg_ |= __b2; - __result.__ctz_ = static_cast(__n); - } - } - return __result; -} - -template -_LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> __swap_ranges_unaligned( - __bit_iterator<_Cl, false> __first, __bit_iterator<_Cl, false> __last, __bit_iterator<_Cr, false> __result) { - using _I1 = __bit_iterator<_Cl, false>; - using difference_type = typename _I1::difference_type; - using __storage_type = typename _I1::__storage_type; - - const int __bits_per_word = _I1::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = std::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) { - unsigned __s = __result.__ctz_ - __first.__ctz_; - *__result.__seg_ |= __b1 << __s; - *__first.__seg_ |= __b2 >> __s; - } else { - unsigned __s = __first.__ctz_ - __result.__ctz_; - *__result.__seg_ |= __b1 >> __s; - *__first.__seg_ |= __b2 << __s; - } - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - unsigned __s = __first.__ctz_ + __ddn; - *__result.__seg_ |= __b1 >> __s; - *__first.__seg_ |= __b2 << __s; - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { - __storage_type __b1 = *__first.__seg_; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 << __result.__ctz_; - *__first.__seg_ = __b2 >> __result.__ctz_; - ++__result.__seg_; - __b2 = *__result.__seg_ & ~__m; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b1 >> __clz_r; - *__first.__seg_ |= __b2 << __clz_r; - } - // do last word - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __dn = std::min<__storage_type>(__n, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 << __result.__ctz_; - *__first.__seg_ |= __b2 >> __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 >> __dn; - *__first.__seg_ |= __b2 << __dn; - __result.__ctz_ = static_cast(__n); - } - } - } - return __result; -} - -template -inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges( - __bit_iterator<_Cl, false> __first1, __bit_iterator<_Cl, false> __last1, __bit_iterator<_Cr, false> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return std::__swap_ranges_aligned(__first1, __last1, __first2); - return std::__swap_ranges_unaligned(__first1, __last1, __first2); -} - -// rotate - template struct __bit_array { using difference_type _LIBCPP_NODEBUG = typename __size_difference_type_traits<_Cp>::difference_type; @@ -630,166 +275,6 @@ struct __bit_array { } }; -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> -rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) { - using _I1 = __bit_iterator<_Cp, false>; - using difference_type = typename _I1::difference_type; - - difference_type __d1 = __middle - __first; - difference_type __d2 = __last - __middle; - _I1 __r = __first + __d2; - while (__d1 != 0 && __d2 != 0) { - if (__d1 <= __d2) { - if (__d1 <= __bit_array<_Cp>::capacity()) { - __bit_array<_Cp> __b(__d1); - std::copy(__first, __middle, __b.begin()); - std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first)); - break; - } else { - __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle); - __first = __middle; - __middle = __mp; - __d2 -= __d1; - } - } else { - if (__d2 <= __bit_array<_Cp>::capacity()) { - __bit_array<_Cp> __b(__d2); - std::copy(__middle, __last, __b.begin()); - std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last)); - break; - } else { - __bit_iterator<_Cp, false> __mp = __first + __d2; - std::swap_ranges(__first, __mp, __middle); - __first = __mp; - __d1 -= __d2; - } - } - } - return __r; -} - -// equal - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_unaligned( - __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - using _It = __bit_iterator<_Cp, _IC1>; - using difference_type = typename _It::difference_type; - using __storage_type = typename _It::__storage_type; - - const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; - if (__n > 0) { - // do first word - if (__first1.__ctz_ != 0) { - unsigned __clz_f = __bits_per_word - __first1.__ctz_; - difference_type __dn = std::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first1.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __ddn = std::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - if (__first2.__ctz_ > __first1.__ctz_) { - if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) - return false; - } else { - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) - return false; - } - __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) - return false; - __first2.__ctz_ = static_cast(__dn); - } - ++__first1.__seg_; - // __first1.__ctz_ = 0; - } - // __first1.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __m = ~__storage_type(0) << __first2.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) { - __storage_type __b = *__first1.__seg_; - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - ++__first2.__seg_; - if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) - return false; - } - // do last word - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first1.__seg_ & __m; - __storage_type __dn = std::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (__b >> __dn)) - return false; - } - } - } - return true; -} - -template -_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI bool __equal_aligned( - __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - using _It = __bit_iterator<_Cp, _IC1>; - using difference_type = typename _It::difference_type; - using __storage_type = typename _It::__storage_type; - - const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; - if (__n > 0) { - // do first word - if (__first1.__ctz_ != 0) { - unsigned __clz = __bits_per_word - __first1.__ctz_; - difference_type __dn = std::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - ++__first2.__seg_; - ++__first1.__seg_; - // __first1.__ctz_ = 0; - // __first2.__ctz_ = 0; - } - // __first1.__ctz_ == 0; - // __first2.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) - if (*__first2.__seg_ != *__first1.__seg_) - return false; - // do last word - if (__n > 0) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - } - } - return true; -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return std::__equal_aligned(__first1, __last1, __first2); - return std::__equal_unaligned(__first1, __last1, __first2); -} - template class __bit_iterator { public: @@ -844,6 +329,7 @@ public: } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator*() const _NOEXCEPT { + _LIBCPP_ASSERT_INTERNAL(__ctz_ < __bits_per_word, "Dereferencing an invalid __bit_iterator."); return __conditional_t<_IsConst, __bit_const_reference<_Cp>, __bit_reference<_Cp> >( __seg_, __storage_type(1) << __ctz_); } @@ -968,7 +454,10 @@ private: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit __bit_iterator(__storage_pointer __s, unsigned __ctz) _NOEXCEPT : __seg_(__s), - __ctz_(__ctz) {} + __ctz_(__ctz) { + _LIBCPP_ASSERT_INTERNAL( + __ctz_ < __bits_per_word, "__bit_iterator initialized with an invalid number of trailing zeros."); + } friend typename _Cp::__self; @@ -989,38 +478,59 @@ private: _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> - copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> > + __copy_impl::operator()( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result) const; template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_unaligned( __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> - copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend struct __copy_backward_impl; template - friend __bit_iterator<_Cr, false> + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false> __swap_ranges_aligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>); template - friend __bit_iterator<_Cr, false> + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Cr, false> __swap_ranges_unaligned(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>); - template - friend __bit_iterator<_Cr, false> - swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>); - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> - rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>); - template + template + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> > + __swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>); + template + _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false> > + __rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>); + template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool - __equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); - template + __equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>); + template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool - __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); - template - _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool - equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + __equal_unaligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>); + template , int> > + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_iter_impl( + __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>, _BinaryPredicate); + template && __is_identity<_Proj1>::value && + __is_identity<_Proj2>::value, + int> > + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool __equal_impl( + __bit_iterator<_Dp, _IsConst1> __first1, + __bit_iterator<_Dp, _IsConst1> __last1, + __bit_iterator<_Dp, _IsConst2> __first2, + __bit_iterator<_Dp, _IsConst2>, + _Pred&, + _Proj1&, + _Proj2&); template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC> __find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type); diff --git a/lib/libcxx/include/__charconv/tables.h b/lib/libcxx/include/__charconv/tables.h index 9568bf841c..b8c6fd8af0 100644 --- a/lib/libcxx/include/__charconv/tables.h +++ b/lib/libcxx/include/__charconv/tables.h @@ -19,16 +19,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 17 - namespace __itoa { -inline constexpr char __base_2_lut[64] = { +inline _LIBCPP_CONSTEXPR const char __base_2_lut[64] = { '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '1', '1', '0', '1', '0', '0', '0', '1', '0', '1', '0', '1', '1', '0', '0', '1', '1', '1', '1', '0', '0', '0', '1', '0', '0', '1', '1', '0', '1', '0', '1', '0', '1', '1', '1', '1', '0', '0', '1', '1', '0', '1', '1', '1', '1', '0', '1', '1', '1', '1'}; -inline constexpr char __base_8_lut[128] = { +inline _LIBCPP_CONSTEXPR const char __base_8_lut[128] = { '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '4', '0', @@ -36,7 +34,7 @@ inline constexpr char __base_8_lut[128] = { '5', '4', '5', '5', '5', '6', '5', '7', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7'}; -inline constexpr char __base_16_lut[512] = { +inline _LIBCPP_CONSTEXPR const char __base_16_lut[512] = { '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '0', 'a', '0', 'b', '0', 'c', '0', 'd', '0', 'e', '0', 'f', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '1', 'a', '1', 'b', '1', 'c', '1', 'd', '1', 'e', '1', 'f', '2', '0', '2', '1', '2', @@ -61,7 +59,7 @@ inline constexpr char __base_16_lut[512] = { '1', 'f', '2', 'f', '3', 'f', '4', 'f', '5', 'f', '6', 'f', '7', 'f', '8', 'f', '9', 'f', 'a', 'f', 'b', 'f', 'c', 'f', 'd', 'f', 'e', 'f', 'f'}; -inline constexpr uint32_t __pow10_32[10] = { +inline _LIBCPP_CONSTEXPR const uint32_t __pow10_32[10] = { UINT32_C(0), UINT32_C(10), UINT32_C(100), @@ -73,7 +71,7 @@ inline constexpr uint32_t __pow10_32[10] = { UINT32_C(100000000), UINT32_C(1000000000)}; -inline constexpr uint64_t __pow10_64[20] = { +inline _LIBCPP_CONSTEXPR const uint64_t __pow10_64[20] = { UINT64_C(0), UINT64_C(10), UINT64_C(100), @@ -96,8 +94,8 @@ inline constexpr uint64_t __pow10_64[20] = { UINT64_C(10000000000000000000)}; # if _LIBCPP_HAS_INT128 -inline constexpr int __pow10_128_offset = 0; -inline constexpr __uint128_t __pow10_128[40] = { +inline _LIBCPP_CONSTEXPR const int __pow10_128_offset = 0; +inline _LIBCPP_CONSTEXPR const __uint128_t __pow10_128[40] = { UINT64_C(0), UINT64_C(10), UINT64_C(100), @@ -140,7 +138,7 @@ inline constexpr __uint128_t __pow10_128[40] = { (__uint128_t(UINT64_C(10000000000000000000)) * UINT64_C(10000000000000000000)) * 10}; # endif -inline constexpr char __digits_base_10[200] = { +inline _LIBCPP_CONSTEXPR const char __digits_base_10[200] = { // clang-format off '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', @@ -156,8 +154,6 @@ inline constexpr char __digits_base_10[200] = { } // namespace __itoa -#endif // _LIBCPP_STD_VER >= 17 - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___CHARCONV_TABLES diff --git a/lib/libcxx/include/__charconv/to_chars_base_10.h b/lib/libcxx/include/__charconv/to_chars_base_10.h index 06e4e69233..d90952ea71 100644 --- a/lib/libcxx/include/__charconv/to_chars_base_10.h +++ b/lib/libcxx/include/__charconv/to_chars_base_10.h @@ -26,55 +26,53 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 17 - namespace __itoa { -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append1(char* __first, uint32_t __value) _NOEXCEPT { *__first = '0' + static_cast(__value); return __first + 1; } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append2(char* __first, uint32_t __value) _NOEXCEPT { return std::copy_n(&__digits_base_10[__value * 2], 2, __first); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append3(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append2(__itoa::__append1(__first, __value / 100), __value % 100); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append4(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append2(__itoa::__append2(__first, __value / 100), __value % 100); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append5(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append4(__itoa::__append1(__first, __value / 10000), __value % 10000); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append6(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append4(__itoa::__append2(__first, __value / 10000), __value % 10000); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append7(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append6(__itoa::__append1(__first, __value / 1000000), __value % 1000000); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append8(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append6(__itoa::__append2(__first, __value / 1000000), __value % 1000000); } -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* __append9(char* __first, uint32_t __value) _NOEXCEPT { return __itoa::__append8(__itoa::__append1(__first, __value / 100000000), __value % 100000000); } template -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI char* __append10(char* __first, _Tp __value) _NOEXCEPT { return __itoa::__append8(__itoa::__append2(__first, static_cast(__value / 100000000)), static_cast(__value % 100000000)); } _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* -__base_10_u32(char* __first, uint32_t __value) noexcept { +__base_10_u32(char* __first, uint32_t __value) _NOEXCEPT { if (__value < 1000000) { if (__value < 10000) { if (__value < 100) { @@ -110,7 +108,7 @@ __base_10_u32(char* __first, uint32_t __value) noexcept { } _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* -__base_10_u64(char* __buffer, uint64_t __value) noexcept { +__base_10_u64(char* __buffer, uint64_t __value) _NOEXCEPT { if (__value <= UINT32_MAX) return __itoa::__base_10_u32(__buffer, static_cast(__value)); @@ -132,13 +130,13 @@ __base_10_u64(char* __buffer, uint64_t __value) noexcept { /// \note The lookup table contains a partial set of exponents limiting the /// range that can be used. However the range is sufficient for /// \ref __base_10_u128. -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) noexcept { +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline __uint128_t __pow_10(int __exp) _NOEXCEPT { _LIBCPP_ASSERT_INTERNAL(__exp >= __pow10_128_offset, "Index out of bounds"); return __pow10_128[__exp - __pow10_128_offset]; } _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI inline char* -__base_10_u128(char* __buffer, __uint128_t __value) noexcept { +__base_10_u128(char* __buffer, __uint128_t __value) _NOEXCEPT { _LIBCPP_ASSERT_INTERNAL( __value > numeric_limits::max(), "The optimizations for this algorithm fails when this isn't true."); @@ -179,8 +177,6 @@ __base_10_u128(char* __buffer, __uint128_t __value) noexcept { # endif } // namespace __itoa -#endif // _LIBCPP_STD_VER >= 17 - _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__charconv/to_chars_integral.h b/lib/libcxx/include/__charconv/to_chars_integral.h index 710299df9b..f10cc35668 100644 --- a/lib/libcxx/include/__charconv/to_chars_integral.h +++ b/lib/libcxx/include/__charconv/to_chars_integral.h @@ -39,16 +39,12 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 17 - -to_chars_result to_chars(char*, char*, bool, int = 10) = delete; - template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type); template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result __to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) { auto __x = std::__to_unsigned_like(__value); if (__value < 0 && __first != __last) { @@ -60,7 +56,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, true_type) { } template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) { using __tx = __itoa::__traits<_Tp>; auto __diff = __last - __first; @@ -73,7 +69,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) { # if _LIBCPP_HAS_INT128 template <> -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) { // When the value fits in 64-bits use the 64-bit code path. This reduces // the number of expensive calculations on 128-bit values. @@ -92,20 +88,20 @@ __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) { } # endif -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result -__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type); +template ::value, int> = 0> +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result +__to_chars_integral(char* __first, char* __last, _Tp __value, int __base); -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result -__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, true_type) { +template ::value, int> = 0> +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result +__to_chars_integral(char* __first, char* __last, _Tp __value, int __base) { auto __x = std::__to_unsigned_like(__value); if (__value < 0 && __first != __last) { *__first++ = '-'; __x = std::__complement(__x); } - return std::__to_chars_integral(__first, __last, __x, __base, false_type()); + return std::__to_chars_integral(__first, __last, __x, __base); } namespace __itoa { @@ -116,15 +112,14 @@ struct _LIBCPP_HIDDEN __integral; template <> struct _LIBCPP_HIDDEN __integral<2> { template - _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept { + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT { // If value == 0 still need one digit. If the value != this has no - // effect since the code scans for the most significant bit set. (Note - // that __libcpp_clz doesn't work for 0.) - return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1); + // effect since the code scans for the most significant bit set. + return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1); } template - _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result + _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result __to_chars(char* __first, char* __last, _Tp __value) { ptrdiff_t __cap = __last - __first; int __n = __width(__value); @@ -152,15 +147,14 @@ struct _LIBCPP_HIDDEN __integral<2> { template <> struct _LIBCPP_HIDDEN __integral<8> { template - _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept { + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT { // If value == 0 still need one digit. If the value != this has no - // effect since the code scans for the most significat bit set. (Note - // that __libcpp_clz doesn't work for 0.) - return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3; + // effect since the code scans for the most significat bit set. + return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3; } template - _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result + _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result __to_chars(char* __first, char* __last, _Tp __value) { ptrdiff_t __cap = __last - __first; int __n = __width(__value); @@ -188,15 +182,14 @@ struct _LIBCPP_HIDDEN __integral<8> { template <> struct _LIBCPP_HIDDEN __integral<16> { template - _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept { + _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR int __width(_Tp __value) _NOEXCEPT { // If value == 0 still need one digit. If the value != this has no - // effect since the code scans for the most significat bit set. (Note - // that __libcpp_clz doesn't work for 0.) - return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4; + // effect since the code scans for the most significat bit set. + return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4; } template - _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static to_chars_result + _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI static __to_chars_result __to_chars(char* __first, char* __last, _Tp __value) { ptrdiff_t __cap = __last - __first; int __n = __width(__value); @@ -235,13 +228,13 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt } template = sizeof(unsigned)), int> = 0> -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result __to_chars_integral(char* __first, char* __last, _Tp __value) { return __itoa::__integral<_Base>::__to_chars(__first, __last, __value); } template = 0> -_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result +_LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result __to_chars_integral(char* __first, char* __last, _Tp __value) { return std::__to_chars_integral<_Base>(__first, __last, static_cast(__value)); } @@ -272,9 +265,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __to_chars_integral_widt __libcpp_unreachable(); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result -__to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_type) { +template ::value, int> > +inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI __to_chars_result +__to_chars_integral(char* __first, char* __last, _Tp __value, int __base) { if (__base == 10) [[likely]] return std::__to_chars_itoa(__first, __last, __value, false_type()); @@ -302,6 +295,28 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base, false_ return {__last, errc(0)}; } +_LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 char __hex_to_upper(char __c) { + switch (__c) { + case 'a': + return 'A'; + case 'b': + return 'B'; + case 'c': + return 'C'; + case 'd': + return 'D'; + case 'e': + return 'E'; + case 'f': + return 'F'; + } + return __c; +} + +#if _LIBCPP_STD_VER >= 17 + +to_chars_result to_chars(char*, char*, bool, int = 10) = delete; + template ::value, int> = 0> inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result to_chars(char* __first, char* __last, _Tp __value) { @@ -316,7 +331,7 @@ to_chars(char* __first, char* __last, _Tp __value, int __base) { _LIBCPP_ASSERT_UNCATEGORIZED(2 <= __base && __base <= 36, "base not in [2, 36]"); using _Type = __make_32_64_or_128_bit_t<_Tp>; - return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base, is_signed<_Tp>()); + return std::__to_chars_integral(__first, __last, static_cast<_Type>(__value), __base); } #endif // _LIBCPP_STD_VER >= 17 diff --git a/lib/libcxx/include/__charconv/to_chars_result.h b/lib/libcxx/include/__charconv/to_chars_result.h index 8df0897a49..41dea4ab14 100644 --- a/lib/libcxx/include/__charconv/to_chars_result.h +++ b/lib/libcxx/include/__charconv/to_chars_result.h @@ -34,6 +34,15 @@ struct _LIBCPP_EXPORTED_FROM_ABI to_chars_result { #endif // _LIBCPP_STD_VER >= 17 +struct __to_chars_result { + char* __ptr; + errc __ec; + +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_HIDE_FROM_ABI constexpr operator to_chars_result() { return {__ptr, __ec}; } +#endif +}; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___CHARCONV_TO_CHARS_RESULT_H diff --git a/lib/libcxx/include/__charconv/traits.h b/lib/libcxx/include/__charconv/traits.h index 2cb37c8cfb..9fd0092ca7 100644 --- a/lib/libcxx/include/__charconv/traits.h +++ b/lib/libcxx/include/__charconv/traits.h @@ -15,6 +15,7 @@ #include <__charconv/tables.h> #include <__charconv/to_chars_base_10.h> #include <__config> +#include <__memory/addressof.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_unsigned.h> #include @@ -29,27 +30,22 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 17 - namespace __itoa { template struct _LIBCPP_HIDDEN __traits_base; template -struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t> { +struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t > { using type = uint32_t; /// The width estimation using a log10 algorithm. /// /// The algorithm is based on /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 - /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that - /// function requires its input to have at least one bit set the value of - /// zero is set to one. This means the first element of the lookup table is - /// zero. + /// Instead of using IntegerLogBase2 it uses __countl_zero. static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) { - auto __t = (32 - std::__libcpp_clz(static_cast(__v | 1))) * 1233 >> 12; + auto __t = (32 - std::__countl_zero(static_cast(__v | 1))) * 1233 >> 12; return __t - (__v < __itoa::__pow10_32[__t]) + 1; } @@ -63,19 +59,16 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t -struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t> { +struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t > { using type = uint64_t; /// The width estimation using a log10 algorithm. /// /// The algorithm is based on /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 - /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that - /// function requires its input to have at least one bit set the value of - /// zero is set to one. This means the first element of the lookup table is - /// zero. + /// Instead of using IntegerLogBase2 it uses __countl_zero. static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) { - auto __t = (64 - std::__libcpp_clz(static_cast(__v | 1))) * 1233 >> 12; + auto __t = (64 - std::__countl_zero(static_cast(__v | 1))) * 1233 >> 12; return __t - (__v < __itoa::__pow10_64[__t]) + 1; } @@ -97,15 +90,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t numeric_limits::max(), "The optimizations for this algorithm fail when this isn't true."); // There's always a bit set in the upper 64-bits. - auto __t = (128 - std::__libcpp_clz(static_cast(__v >> 64))) * 1233 >> 12; + auto __t = (128 - std::__countl_zero(static_cast(__v >> 64))) * 1233 >> 12; _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds"); // __t is adjusted since the lookup table misses the lower entries. return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1; @@ -142,7 +132,7 @@ __mul_overflowed(unsigned short __a, _Tp __b, unsigned short& __r) { template inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool __mul_overflowed(_Tp __a, _Tp __b, _Tp& __r) { static_assert(is_unsigned<_Tp>::value, ""); - return __builtin_mul_overflow(__a, __b, &__r); + return __builtin_mul_overflow(__a, __b, std::addressof(__r)); } template @@ -152,7 +142,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX23 __mul_overflowed template struct _LIBCPP_HIDDEN __traits : __traits_base<_Tp> { - static constexpr int digits = numeric_limits<_Tp>::digits10 + 1; + static _LIBCPP_CONSTEXPR const int digits = numeric_limits<_Tp>::digits10 + 1; using __traits_base<_Tp>::__pow; using typename __traits_base<_Tp>::type; @@ -191,8 +181,6 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI _Tp __complement(_Tp return _Tp(~__x + 1); } -#endif // _LIBCPP_STD_VER >= 17 - _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__chrono/convert_to_tm.h b/lib/libcxx/include/__chrono/convert_to_tm.h index 7d06a38d87..817e6747a7 100644 --- a/lib/libcxx/include/__chrono/convert_to_tm.h +++ b/lib/libcxx/include/__chrono/convert_to_tm.h @@ -15,6 +15,7 @@ #include <__chrono/day.h> #include <__chrono/duration.h> #include <__chrono/file_clock.h> +#include <__chrono/gps_clock.h> #include <__chrono/hh_mm_ss.h> #include <__chrono/local_info.h> #include <__chrono/month.h> @@ -23,6 +24,7 @@ #include <__chrono/statically_widen.h> #include <__chrono/sys_info.h> #include <__chrono/system_clock.h> +#include <__chrono/tai_clock.h> #include <__chrono/time_point.h> #include <__chrono/utc_clock.h> #include <__chrono/weekday.h> @@ -35,6 +37,7 @@ #include <__config> #include <__format/format_error.h> #include <__memory/addressof.h> +#include <__type_traits/common_type.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_specialization.h> #include @@ -112,6 +115,21 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(chrono::utc_time<_Duration> __tp) { return __result; } +template +_LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(chrono::tai_time<_Duration> __tp) { + using _Rp = common_type_t<_Duration, chrono::seconds>; + // The time between the TAI epoch (1958-01-01) and UNIX epoch (1970-01-01). + // This avoids leap second conversion when going from TAI to UTC. + // (It also avoids issues when the date is before the UTC epoch.) + constexpr chrono::seconds __offset{4383 * 24 * 60 * 60}; + return std::__convert_to_tm<_Tm>(chrono::sys_time<_Rp>{__tp.time_since_epoch() - __offset}); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(chrono::gps_time<_Duration> __tp) { + return std::__convert_to_tm<_Tm>(chrono::utc_clock::to_sys(chrono::gps_clock::to_utc(__tp))); +} + # endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB # endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION @@ -125,20 +143,16 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(const _ChronoT& __value) { # endif if constexpr (__is_time_point<_ChronoT>) { - if constexpr (same_as) - return std::__convert_to_tm<_Tm>(__value); -# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION -# if _LIBCPP_HAS_EXPERIMENTAL_TZDB - else if constexpr (same_as) - return std::__convert_to_tm<_Tm>(__value); -# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB -# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION - else if constexpr (same_as) + if constexpr (same_as) return std::__convert_to_tm<_Tm>(_ChronoT::clock::to_sys(__value)); else if constexpr (same_as) return std::__convert_to_tm<_Tm>(chrono::sys_time{__value.time_since_epoch()}); - else + else { + // Note that some clocks have specializations __convert_to_tm for their + // time_point. These don't need to be added here. They do not trigger + // this assert. static_assert(sizeof(_ChronoT) == 0, "TODO: Add the missing clock specialization"); + } } else if constexpr (chrono::__is_duration_v<_ChronoT>) { // [time.format]/6 // ... However, if a flag refers to a "time of day" (e.g. %H, %I, %p, diff --git a/lib/libcxx/include/__chrono/duration.h b/lib/libcxx/include/__chrono/duration.h index 941aca6009..57fa64d650 100644 --- a/lib/libcxx/include/__chrono/duration.h +++ b/lib/libcxx/include/__chrono/duration.h @@ -32,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace chrono { template > -class _LIBCPP_TEMPLATE_VIS duration; +class duration; template inline const bool __is_duration_v = false; @@ -52,7 +52,7 @@ inline const bool __is_duration_v > = tru } // namespace chrono template -struct _LIBCPP_TEMPLATE_VIS common_type, chrono::duration<_Rep2, _Period2> > { +struct common_type, chrono::duration<_Rep2, _Period2> > { typedef chrono::duration::type, __ratio_gcd<_Period1, _Period2> > type; }; @@ -107,7 +107,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration duration_cast(const d } template -struct _LIBCPP_TEMPLATE_VIS treat_as_floating_point : is_floating_point<_Rep> {}; +struct treat_as_floating_point : is_floating_point<_Rep> {}; #if _LIBCPP_STD_VER >= 17 template @@ -115,7 +115,7 @@ inline constexpr bool treat_as_floating_point_v = treat_as_floating_point<_Rep>: #endif template -struct _LIBCPP_TEMPLATE_VIS duration_values { +struct duration_values { public: _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep zero() _NOEXCEPT { return _Rep(0); } _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR _Rep max() _NOEXCEPT { return numeric_limits<_Rep>::max(); } @@ -156,7 +156,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration< // duration template -class _LIBCPP_TEMPLATE_VIS duration { +class duration { static_assert(!__is_duration_v<_Rep>, "A duration representation can not be a duration"); static_assert(__is_ratio_v<_Period>, "Second template parameter of duration must be a std::ratio"); static_assert(_Period::num > 0, "duration period must be positive"); diff --git a/lib/libcxx/include/__chrono/formatter.h b/lib/libcxx/include/__chrono/formatter.h index d17acd274e..49758397f6 100644 --- a/lib/libcxx/include/__chrono/formatter.h +++ b/lib/libcxx/include/__chrono/formatter.h @@ -21,6 +21,7 @@ # include <__chrono/day.h> # include <__chrono/duration.h> # include <__chrono/file_clock.h> +# include <__chrono/gps_clock.h> # include <__chrono/hh_mm_ss.h> # include <__chrono/local_info.h> # include <__chrono/month.h> @@ -31,6 +32,7 @@ # include <__chrono/statically_widen.h> # include <__chrono/sys_info.h> # include <__chrono/system_clock.h> +# include <__chrono/tai_clock.h> # include <__chrono/time_point.h> # include <__chrono/utc_clock.h> # include <__chrono/weekday.h> @@ -48,12 +50,14 @@ # include <__format/formatter.h> # include <__format/parser_std_format_spec.h> # include <__format/write_escaped.h> +# include <__iterator/istreambuf_iterator.h> +# include <__iterator/ostreambuf_iterator.h> +# include <__locale_dir/time.h> # include <__memory/addressof.h> # include <__type_traits/is_specialization.h> # include # include # include -# include # include # include @@ -232,9 +236,13 @@ _LIBCPP_HIDE_FROM_ABI __time_zone __convert_to_time_zone([[maybe_unused]] const if constexpr (same_as<_Tp, chrono::sys_info>) return {__value.abbrev, __value.offset}; # if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM + else if constexpr (__is_time_point<_Tp> && requires { requires same_as; }) + return {"TAI", chrono::seconds{0}}; + else if constexpr (__is_time_point<_Tp> && requires { requires same_as; }) + return {"GPS", chrono::seconds{0}}; else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return __formatter::__convert_to_time_zone(__value.get_info()); -# endif +# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else # endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB return {"UTC", chrono::seconds{0}}; @@ -312,7 +320,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( case _CharT('T'): __facet.put( {__sstr}, __sstr, _CharT(' '), std::addressof(__t), std::to_address(__s), std::to_address(__it + 1)); - if constexpr (__use_fraction<_Tp>()) + if constexpr (__formatter::__use_fraction<_Tp>()) __formatter::__format_sub_seconds(__sstr, __value); break; @@ -375,7 +383,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( break; case _CharT('O'): - if constexpr (__use_fraction<_Tp>()) { + if constexpr (__formatter::__use_fraction<_Tp>()) { // Handle OS using the normal representation for the non-fractional // part. There seems to be no locale information regarding how the // fractional part should be formatted. @@ -692,7 +700,7 @@ __format_chrono(const _Tp& __value, } // namespace __formatter template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS __formatter_chrono { +struct __formatter_chrono { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator @@ -710,7 +718,7 @@ public: }; template -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { +struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -724,7 +732,29 @@ public: # if _LIBCPP_HAS_EXPERIMENTAL_TZDB template -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { +struct formatter, _CharT> : public __formatter_chrono<_CharT> { +public: + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; + + template + _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { + return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags::__clock); + } +}; + +template +struct formatter, _CharT> : public __formatter_chrono<_CharT> { +public: + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; + + template + _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { + return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags::__clock); + } +}; + +template +struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -738,7 +768,7 @@ public: # endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM template -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { +struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -749,7 +779,7 @@ public: }; template -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { +struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -783,7 +813,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -794,7 +824,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -805,7 +835,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -816,7 +846,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -827,7 +857,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -838,7 +868,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -849,7 +879,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -860,7 +890,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -871,7 +901,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -882,7 +912,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -893,7 +923,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -904,7 +934,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -915,7 +945,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -926,7 +956,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; @@ -937,7 +967,7 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { +struct formatter : public __formatter_chrono<_CharT> { public: using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; diff --git a/lib/libcxx/include/__chrono/gps_clock.h b/lib/libcxx/include/__chrono/gps_clock.h new file mode 100644 index 0000000000..2e220cab94 --- /dev/null +++ b/lib/libcxx/include/__chrono/gps_clock.h @@ -0,0 +1,90 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CHRONO_GPS_CLOCK_H +#define _LIBCPP___CHRONO_GPS_CLOCK_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB + +# include <__assert> +# include <__chrono/duration.h> +# include <__chrono/time_point.h> +# include <__chrono/utc_clock.h> +# include <__config> +# include <__type_traits/common_type.h> + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION + +namespace chrono { + +class gps_clock; + +template +using gps_time = time_point; +using gps_seconds = gps_time; + +class gps_clock { +public: + using rep = utc_clock::rep; + using period = utc_clock::period; + using duration = chrono::duration; + using time_point = chrono::time_point; + static constexpr bool is_steady = false; // The utc_clock is not steady. + + // The static difference between UTC and GPS time as specified in the Standard. + static constexpr chrono::seconds __offset{315964809}; + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static time_point now() { return from_utc(utc_clock::now()); } + + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static utc_time> + to_utc(const gps_time<_Duration>& __time) noexcept { + using _Rp = common_type_t<_Duration, seconds>; + _Duration __time_since_epoch = __time.time_since_epoch(); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__time_since_epoch >= utc_time<_Rp>::min().time_since_epoch() + __offset, + "the GPS to UTC conversion would underflow"); + + return utc_time<_Rp>{__time_since_epoch + __offset}; + } + + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static gps_time> + from_utc(const utc_time<_Duration>& __time) noexcept { + using _Rp = common_type_t<_Duration, seconds>; + _Duration __time_since_epoch = __time.time_since_epoch(); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__time_since_epoch <= utc_time<_Rp>::max().time_since_epoch() - __offset, + "the UTC to GPS conversion would overflow"); + + return gps_time<_Rp>{__time_since_epoch - __offset}; + } +}; + +} // namespace chrono + +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB + +#endif // _LIBCPP___CHRONO_GPS_CLOCK_H diff --git a/lib/libcxx/include/__chrono/ostream.h b/lib/libcxx/include/__chrono/ostream.h index ed9ad8e346..7a01b18678 100644 --- a/lib/libcxx/include/__chrono/ostream.h +++ b/lib/libcxx/include/__chrono/ostream.h @@ -18,6 +18,7 @@ # include <__chrono/day.h> # include <__chrono/duration.h> # include <__chrono/file_clock.h> +# include <__chrono/gps_clock.h> # include <__chrono/hh_mm_ss.h> # include <__chrono/local_info.h> # include <__chrono/month.h> @@ -26,6 +27,7 @@ # include <__chrono/statically_widen.h> # include <__chrono/sys_info.h> # include <__chrono/system_clock.h> +# include <__chrono/tai_clock.h> # include <__chrono/utc_clock.h> # include <__chrono/weekday.h> # include <__chrono/year.h> @@ -71,6 +73,18 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const utc_time<_Duration>& __tp return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T}"), __tp); } +template +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, const tai_time<_Duration>& __tp) { + return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T}"), __tp); +} + +template +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, const gps_time<_Duration>& __tp) { + return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T}"), __tp); +} + # endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB # endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM diff --git a/lib/libcxx/include/__chrono/parser_std_format_spec.h b/lib/libcxx/include/__chrono/parser_std_format_spec.h index 4df8e603c6..8ecc19f3c7 100644 --- a/lib/libcxx/include/__chrono/parser_std_format_spec.h +++ b/lib/libcxx/include/__chrono/parser_std_format_spec.h @@ -139,7 +139,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __validate_time_zone(__flags __flags) { } template -class _LIBCPP_TEMPLATE_VIS __parser_chrono { +class __parser_chrono { using _ConstIterator _LIBCPP_NODEBUG = typename basic_format_parse_context<_CharT>::const_iterator; public: diff --git a/lib/libcxx/include/__chrono/tai_clock.h b/lib/libcxx/include/__chrono/tai_clock.h new file mode 100644 index 0000000000..14c8b70a94 --- /dev/null +++ b/lib/libcxx/include/__chrono/tai_clock.h @@ -0,0 +1,108 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CHRONO_TAI_CLOCK_H +#define _LIBCPP___CHRONO_TAI_CLOCK_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB + +# include <__assert> +# include <__chrono/duration.h> +# include <__chrono/time_point.h> +# include <__chrono/utc_clock.h> +# include <__config> +# include <__type_traits/common_type.h> + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION + +namespace chrono { + +class tai_clock; + +template +using tai_time = time_point; +using tai_seconds = tai_time; + +// [time.clock.tai.overview]/1 +// The clock tai_clock measures seconds since 1958-01-01 00:00:00 and is +// offset 10s ahead of UTC at this date. That is, 1958-01-01 00:00:00 TAI is +// equivalent to 1957-12-31 23:59:50 UTC. Leap seconds are not inserted into +// TAI. Therefore every time a leap second is inserted into UTC, UTC shifts +// another second with respect to TAI. For example by 2000-01-01 there had +// been 22 positive and 0 negative leap seconds inserted so 2000-01-01 +// 00:00:00 UTC is equivalent to 2000-01-01 00:00:32 TAI (22s plus the +// initial 10s offset). +// +// Note this does not specify what the UTC offset before 1958-01-01 00:00:00 +// TAI is, nor does it follow the "real" TAI clock between 1958-01-01 and the +// start of the UTC epoch. So while the member functions are fully specified in +// the standard, they do not technically follow the "real-world" TAI clock with +// 100% accuracy. +// +// https://koka-lang.github.io/koka/doc/std_time_utc.html contains more +// information and references. +class tai_clock { +public: + using rep = utc_clock::rep; + using period = utc_clock::period; + using duration = chrono::duration; + using time_point = chrono::time_point; + static constexpr bool is_steady = false; // The utc_clock is not steady. + + // The static difference between UTC and TAI time. + static constexpr chrono::seconds __offset{378691210}; + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static time_point now() { return from_utc(utc_clock::now()); } + + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static utc_time> + to_utc(const tai_time<_Duration>& __time) noexcept { + using _Rp = common_type_t<_Duration, seconds>; + _Duration __time_since_epoch = __time.time_since_epoch(); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__time_since_epoch >= utc_time<_Rp>::min().time_since_epoch() + __offset, + "the TAI to UTC conversion would underflow"); + + return utc_time<_Rp>{__time_since_epoch - __offset}; + } + + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static tai_time> + from_utc(const utc_time<_Duration>& __time) noexcept { + using _Rp = common_type_t<_Duration, seconds>; + _Duration __time_since_epoch = __time.time_since_epoch(); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__time_since_epoch <= utc_time<_Rp>::max().time_since_epoch() - __offset, + "the UTC to TAI conversion would overflow"); + + return tai_time<_Rp>{__time_since_epoch + __offset}; + } +}; + +} // namespace chrono + +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB + +#endif // _LIBCPP___CHRONO_TAI_CLOCK_H diff --git a/lib/libcxx/include/__chrono/time_point.h b/lib/libcxx/include/__chrono/time_point.h index 5e79fa5d25..fc4408d23d 100644 --- a/lib/libcxx/include/__chrono/time_point.h +++ b/lib/libcxx/include/__chrono/time_point.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace chrono { template -class _LIBCPP_TEMPLATE_VIS time_point { +class time_point { static_assert(__is_duration_v<_Duration>, "Second template parameter of time_point must be a std::chrono::duration"); public: @@ -58,6 +58,19 @@ public: // arithmetic +#if _LIBCPP_STD_VER >= 20 + _LIBCPP_HIDE_FROM_ABI constexpr time_point& operator++() { + ++__d_; + return *this; + } + _LIBCPP_HIDE_FROM_ABI constexpr time_point operator++(int) { return time_point{__d_++}; } + _LIBCPP_HIDE_FROM_ABI constexpr time_point& operator--() { + --__d_; + return *this; + } + _LIBCPP_HIDE_FROM_ABI constexpr time_point operator--(int) { return time_point{__d_--}; } +#endif // _LIBCPP_STD_VER >= 20 + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 time_point& operator+=(const duration& __d) { __d_ += __d; return *this; @@ -76,8 +89,7 @@ public: } // namespace chrono template -struct _LIBCPP_TEMPLATE_VIS -common_type, chrono::time_point<_Clock, _Duration2> > { +struct common_type, chrono::time_point<_Clock, _Duration2> > { typedef chrono::time_point<_Clock, typename common_type<_Duration1, _Duration2>::type> type; }; diff --git a/lib/libcxx/include/__compare/common_comparison_category.h b/lib/libcxx/include/__compare/common_comparison_category.h index 215922abad..eae2ef0062 100644 --- a/lib/libcxx/include/__compare/common_comparison_category.h +++ b/lib/libcxx/include/__compare/common_comparison_category.h @@ -55,7 +55,7 @@ __compute_comp_type(const _ClassifyCompCategory (&__types)[_Size]) { template _LIBCPP_HIDE_FROM_ABI constexpr auto __get_comp_type() { using _CCC = _ClassifyCompCategory; - constexpr _CCC __type_kinds[] = {_StrongOrd, __type_to_enum<_Ts>()...}; + constexpr _CCC __type_kinds[] = {_StrongOrd, __comp_detail::__type_to_enum<_Ts>()...}; constexpr _CCC __cat = __comp_detail::__compute_comp_type(__type_kinds); if constexpr (__cat == _None) return void(); @@ -72,8 +72,8 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __get_comp_type() { // [cmp.common], common comparison category type template -struct _LIBCPP_TEMPLATE_VIS common_comparison_category { - using type = decltype(__comp_detail::__get_comp_type<_Ts...>()); +struct common_comparison_category { + using type _LIBCPP_NODEBUG = decltype(__comp_detail::__get_comp_type<_Ts...>()); }; template diff --git a/lib/libcxx/include/__compare/compare_three_way.h b/lib/libcxx/include/__compare/compare_three_way.h index 01c12076c0..852c5874c6 100644 --- a/lib/libcxx/include/__compare/compare_three_way.h +++ b/lib/libcxx/include/__compare/compare_three_way.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 -struct _LIBCPP_TEMPLATE_VIS compare_three_way { +struct compare_three_way { template requires three_way_comparable_with<_T1, _T2> constexpr _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const diff --git a/lib/libcxx/include/__compare/compare_three_way_result.h b/lib/libcxx/include/__compare/compare_three_way_result.h index 6ee2eff003..264a4d603c 100644 --- a/lib/libcxx/include/__compare/compare_three_way_result.h +++ b/lib/libcxx/include/__compare/compare_three_way_result.h @@ -29,12 +29,12 @@ struct _LIBCPP_HIDE_FROM_ABI __compare_three_way_result< _Tp, _Up, decltype(std::declval<__make_const_lvalue_ref<_Tp>>() <=> std::declval<__make_const_lvalue_ref<_Up>>(), void())> { - using type = decltype(std::declval<__make_const_lvalue_ref<_Tp>>() <=> std::declval<__make_const_lvalue_ref<_Up>>()); + using type _LIBCPP_NODEBUG = + decltype(std::declval<__make_const_lvalue_ref<_Tp>>() <=> std::declval<__make_const_lvalue_ref<_Up>>()); }; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_NO_SPECIALIZATIONS compare_three_way_result - : __compare_three_way_result<_Tp, _Up, void> {}; +struct _LIBCPP_NO_SPECIALIZATIONS compare_three_way_result : __compare_three_way_result<_Tp, _Up, void> {}; template using compare_three_way_result_t = typename compare_three_way_result<_Tp, _Up>::type; diff --git a/lib/libcxx/include/__concepts/arithmetic.h b/lib/libcxx/include/__concepts/arithmetic.h index 0c44f11780..64c0200783 100644 --- a/lib/libcxx/include/__concepts/arithmetic.h +++ b/lib/libcxx/include/__concepts/arithmetic.h @@ -13,8 +13,6 @@ #include <__type_traits/is_floating_point.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_signed.h> -#include <__type_traits/is_signed_integer.h> -#include <__type_traits/is_unsigned_integer.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -38,17 +36,6 @@ concept unsigned_integral = integral<_Tp> && !signed_integral<_Tp>; template concept floating_point = is_floating_point_v<_Tp>; -// Concept helpers for the internal type traits for the fundamental types. - -template -concept __libcpp_unsigned_integer = __libcpp_is_unsigned_integer<_Tp>::value; - -template -concept __libcpp_signed_integer = __libcpp_is_signed_integer<_Tp>::value; - -template -concept __libcpp_integer = __libcpp_unsigned_integer<_Tp> || __libcpp_signed_integer<_Tp>; - #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__concepts/class_or_enum.h b/lib/libcxx/include/__concepts/class_or_enum.h index 2739e31e14..9fceed034c 100644 --- a/lib/libcxx/include/__concepts/class_or_enum.h +++ b/lib/libcxx/include/__concepts/class_or_enum.h @@ -13,7 +13,6 @@ #include <__type_traits/is_class.h> #include <__type_traits/is_enum.h> #include <__type_traits/is_union.h> -#include <__type_traits/remove_cvref.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__concepts/common_with.h b/lib/libcxx/include/__concepts/common_with.h index 85abb05efb..09082b2295 100644 --- a/lib/libcxx/include/__concepts/common_with.h +++ b/lib/libcxx/include/__concepts/common_with.h @@ -12,7 +12,7 @@ #include <__concepts/common_reference_with.h> #include <__concepts/same_as.h> #include <__config> -#include <__type_traits/add_lvalue_reference.h> +#include <__type_traits/add_reference.h> #include <__type_traits/common_reference.h> #include <__type_traits/common_type.h> #include <__utility/declval.h> diff --git a/lib/libcxx/include/__concepts/swappable.h b/lib/libcxx/include/__concepts/swappable.h index 985c733021..c691d3ab1e 100644 --- a/lib/libcxx/include/__concepts/swappable.h +++ b/lib/libcxx/include/__concepts/swappable.h @@ -22,7 +22,6 @@ #include <__utility/exchange.h> #include <__utility/forward.h> #include <__utility/move.h> -#include <__utility/swap.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__condition_variable/condition_variable.h b/lib/libcxx/include/__condition_variable/condition_variable.h index 4521fe2746..1e8edd5dcb 100644 --- a/lib/libcxx/include/__condition_variable/condition_variable.h +++ b/lib/libcxx/include/__condition_variable/condition_variable.h @@ -39,60 +39,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_DECLARE_STRONG_ENUM(cv_status){no_timeout, timeout}; _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) -class _LIBCPP_EXPORTED_FROM_ABI condition_variable { - __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; - -public: - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; - -# if _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION - ~condition_variable() = default; -# else - ~condition_variable(); -# endif - - condition_variable(const condition_variable&) = delete; - condition_variable& operator=(const condition_variable&) = delete; - - void notify_one() _NOEXCEPT; - void notify_all() _NOEXCEPT; - - void wait(unique_lock& __lk) _NOEXCEPT; - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS void wait(unique_lock& __lk, _Predicate __pred); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS cv_status - wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool - wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS cv_status - wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d); - - template - bool _LIBCPP_HIDE_FROM_ABI - wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred); - - typedef __libcpp_condvar_t* native_handle_type; - _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; } - -private: - void - __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; -# if _LIBCPP_HAS_COND_CLOCKWAIT - _LIBCPP_HIDE_FROM_ABI void - __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; -# endif - template - _LIBCPP_HIDE_FROM_ABI void - __do_timed_wait(unique_lock& __lk, chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; -}; -#endif // _LIBCPP_HAS_THREADS - template ::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI chrono::nanoseconds __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { using namespace chrono; @@ -140,64 +86,106 @@ inline _LIBCPP_HIDE_FROM_ABI chrono::nanoseconds __safe_nanosecond_cast(chrono:: return nanoseconds(__result); } -#if _LIBCPP_HAS_THREADS -template -void condition_variable::wait(unique_lock& __lk, _Predicate __pred) { - while (!__pred()) - wait(__lk); -} +class _LIBCPP_EXPORTED_FROM_ABI condition_variable { + __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; -template -cv_status condition_variable::wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t) { - using namespace chrono; - using __clock_tp_ns = time_point<_Clock, nanoseconds>; +public: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; - typename _Clock::time_point __now = _Clock::now(); - if (__t <= __now) - return cv_status::timeout; - - __clock_tp_ns __t_ns = __clock_tp_ns(std::__safe_nanosecond_cast(__t.time_since_epoch())); - - __do_timed_wait(__lk, __t_ns); - return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout; -} - -template -bool condition_variable::wait_until( - unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred) { - while (!__pred()) { - if (wait_until(__lk, __t) == cv_status::timeout) - return __pred(); - } - return true; -} - -template -cv_status condition_variable::wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d) { - using namespace chrono; - if (__d <= __d.zero()) - return cv_status::timeout; - using __ns_rep = nanoseconds::rep; - steady_clock::time_point __c_now = steady_clock::now(); - -# if _LIBCPP_HAS_COND_CLOCKWAIT - using __clock_tp_ns = time_point; - __ns_rep __now_count_ns = std::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); +# if _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION + ~condition_variable() = default; # else - using __clock_tp_ns = time_point; - __ns_rep __now_count_ns = std::__safe_nanosecond_cast(system_clock::now().time_since_epoch()).count(); + ~condition_variable(); # endif - __ns_rep __d_ns_count = std::__safe_nanosecond_cast(__d).count(); + condition_variable(const condition_variable&) = delete; + condition_variable& operator=(const condition_variable&) = delete; - if (__now_count_ns > numeric_limits<__ns_rep>::max() - __d_ns_count) { - __do_timed_wait(__lk, __clock_tp_ns::max()); - } else { - __do_timed_wait(__lk, __clock_tp_ns(nanoseconds(__now_count_ns + __d_ns_count))); + void notify_one() _NOEXCEPT; + void notify_all() _NOEXCEPT; + + void wait(unique_lock& __lk) _NOEXCEPT; + + template + _LIBCPP_HIDE_FROM_ABI void wait(unique_lock& __lk, _Predicate __pred) { + while (!__pred()) + wait(__lk); } - return steady_clock::now() - __c_now < __d ? cv_status::no_timeout : cv_status::timeout; -} + template + _LIBCPP_HIDE_FROM_ABI cv_status + wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t) { + using namespace chrono; + using __clock_tp_ns = time_point<_Clock, nanoseconds>; + + typename _Clock::time_point __now = _Clock::now(); + if (__t <= __now) + return cv_status::timeout; + + __clock_tp_ns __t_ns = __clock_tp_ns(std::__safe_nanosecond_cast(__t.time_since_epoch())); + + __do_timed_wait(__lk, __t_ns); + return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout; + } + + template + _LIBCPP_HIDE_FROM_ABI bool + wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred) { + while (!__pred()) { + if (wait_until(__lk, __t) == cv_status::timeout) + return __pred(); + } + return true; + } + + template + _LIBCPP_HIDE_FROM_ABI cv_status wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d) { + using namespace chrono; + if (__d <= __d.zero()) + return cv_status::timeout; + using __ns_rep = nanoseconds::rep; + steady_clock::time_point __c_now = steady_clock::now(); + +# if _LIBCPP_HAS_COND_CLOCKWAIT + using __clock_tp_ns = time_point; + __ns_rep __now_count_ns = std::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); +# else + using __clock_tp_ns = time_point; + __ns_rep __now_count_ns = std::__safe_nanosecond_cast(system_clock::now().time_since_epoch()).count(); +# endif + + __ns_rep __d_ns_count = std::__safe_nanosecond_cast(__d).count(); + + if (__now_count_ns > numeric_limits<__ns_rep>::max() - __d_ns_count) { + __do_timed_wait(__lk, __clock_tp_ns::max()); + } else { + __do_timed_wait(__lk, __clock_tp_ns(nanoseconds(__now_count_ns + __d_ns_count))); + } + + return steady_clock::now() - __c_now < __d ? cv_status::no_timeout : cv_status::timeout; + } + + template + bool _LIBCPP_HIDE_FROM_ABI + wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred); + + typedef __libcpp_condvar_t* native_handle_type; + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; } + +private: + void + __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; +# if _LIBCPP_HAS_COND_CLOCKWAIT + _LIBCPP_HIDE_FROM_ABI void + __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; +# endif + template + _LIBCPP_HIDE_FROM_ABI void + __do_timed_wait(unique_lock& __lk, chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; +}; +#endif // _LIBCPP_HAS_THREADS + +#if _LIBCPP_HAS_THREADS template inline bool @@ -210,7 +198,7 @@ inline void condition_variable::__do_timed_wait( unique_lock& __lk, chrono::time_point __tp) _NOEXCEPT { using namespace chrono; if (!__lk.owns_lock()) - __throw_system_error(EPERM, "condition_variable::timed wait: mutex not locked"); + std::__throw_system_error(EPERM, "condition_variable::timed wait: mutex not locked"); nanoseconds __d = __tp.time_since_epoch(); timespec __ts; seconds __s = duration_cast(__d); @@ -225,7 +213,7 @@ inline void condition_variable::__do_timed_wait( } int __ec = pthread_cond_clockwait(&__cv_, __lk.mutex()->native_handle(), CLOCK_MONOTONIC, &__ts); if (__ec != 0 && __ec != ETIMEDOUT) - __throw_system_error(__ec, "condition_variable timed_wait failed"); + std::__throw_system_error(__ec, "condition_variable timed_wait failed"); } # endif // _LIBCPP_HAS_COND_CLOCKWAIT diff --git a/lib/libcxx/include/__config b/lib/libcxx/include/__config index fec323f812..41cb6ac3b6 100644 --- a/lib/libcxx/include/__config +++ b/lib/libcxx/include/__config @@ -28,7 +28,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 200100 +# define _LIBCPP_VERSION 210100 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) @@ -38,11 +38,47 @@ # define _LIBCPP_FREESTANDING # endif +// NOLINTNEXTLINE(libcpp-cpp-version-check) +# if __cplusplus < 201103L +# define _LIBCPP_CXX03_LANG +# endif + +# if __has_feature(experimental_library) +# ifndef _LIBCPP_ENABLE_EXPERIMENTAL +# define _LIBCPP_ENABLE_EXPERIMENTAL +# endif +# endif + +// Incomplete features get their own specific disabling flags. This makes it +// easier to grep for target specific flags once the feature is complete. +# if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY) +# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1 +# else +# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0 +# endif + +# define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# define _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC _LIBCPP_HAS_EXPERIMENTAL_LIBRARY + // HARDENING { -// TODO: Remove in LLVM 21. We're making this an error to catch folks who might not have migrated. -# ifdef _LIBCPP_ENABLE_ASSERTIONS -# error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE instead" +// TODO(LLVM 23): Remove this. We're making these an error to catch folks who might not have migrated. +// Since hardening went through several changes (many of which impacted user-facing macros), +// we're keeping these checks around for a bit longer than usual. Failure to properly configure +// hardening results in checks being dropped silently, which is a pretty big deal. +# if defined(_LIBCPP_ENABLE_ASSERTIONS) +# error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +# endif +# if defined(_LIBCPP_ENABLE_HARDENED_MODE) +# error "_LIBCPP_ENABLE_HARDENED_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +# endif +# if defined(_LIBCPP_ENABLE_SAFE_MODE) +# error "_LIBCPP_ENABLE_SAFE_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" +# endif +# if defined(_LIBCPP_ENABLE_DEBUG_MODE) +# error "_LIBCPP_ENABLE_DEBUG_MODE has been removed, please use _LIBCPP_HARDENING_MODE= instead (see docs)" # endif // The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: @@ -147,16 +183,53 @@ _LIBCPP_HARDENING_MODE_EXTENSIVE, \ _LIBCPP_HARDENING_MODE_DEBUG # endif +// Hardening assertion semantics generally mirror the evaluation semantics of C++26 Contracts: +// - `ignore` evaluates the assertion but doesn't do anything if it fails (note that it differs from the Contracts +// `ignore` semantic which wouldn't evaluate the assertion at all); +// - `observe` logs an error (indicating, if possible, that the error is fatal) and continues execution; +// - `quick-enforce` terminates the program as fast as possible (via trapping); +// - `enforce` logs an error and then terminates the program. +// +// Notes: +// - Continuing execution after a hardening check fails results in undefined behavior; the `observe` semantic is meant +// to make adopting hardening easier but should not be used outside of this scenario; +// - C++26 wording for Library Hardening precludes a conforming Hardened implementation from using the Contracts +// `ignore` semantic when evaluating hardened preconditions in the Library. Libc++ allows using this semantic for +// hardened preconditions, however, be aware that using `ignore` does not produce a conforming "Hardened" +// implementation, unlike the other semantics above. +// clang-format off +# define _LIBCPP_ASSERTION_SEMANTIC_IGNORE (1 << 1) +# define _LIBCPP_ASSERTION_SEMANTIC_OBSERVE (1 << 2) +# define _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE (1 << 3) +# define _LIBCPP_ASSERTION_SEMANTIC_ENFORCE (1 << 4) +// clang-format on + +// Allow users to define an arbitrary assertion semantic; otherwise, use the default mapping from modes to semantics. +// The default is for production-capable modes to use `quick-enforce` (i.e., trap) and for the `debug` mode to use +// `enforce` (i.e., log and abort). +# ifndef _LIBCPP_ASSERTION_SEMANTIC + +# if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_ENFORCE +# else +# define _LIBCPP_ASSERTION_SEMANTIC _LIBCPP_ASSERTION_SEMANTIC_QUICK_ENFORCE +# endif + +# else +# if !_LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# error "Assertion semantics are an experimental feature." +# endif +# if defined(_LIBCPP_CXX03_LANG) +# error "Assertion semantics are not available in the C++03 mode." +# endif + +# endif // _LIBCPP_ASSERTION_SEMANTIC + // } HARDENING # define _LIBCPP_TOSTRING2(x) #x # define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x) -// NOLINTNEXTLINE(libcpp-cpp-version-check) -# if __cplusplus < 201103L -# define _LIBCPP_CXX03_LANG -# endif - # ifndef __has_constexpr_builtin # define __has_constexpr_builtin(x) 0 # endif @@ -190,24 +263,6 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ABI_VCRUNTIME # endif -# if __has_feature(experimental_library) -# ifndef _LIBCPP_ENABLE_EXPERIMENTAL -# define _LIBCPP_ENABLE_EXPERIMENTAL -# endif -# endif - -// Incomplete features get their own specific disabling flags. This makes it -// easier to grep for target specific flags once the feature is complete. -# if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1 -# else -# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0 -# endif - -# define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY -# define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY -# define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY - # if defined(__MVS__) # include // for __NATIVE_ASCII_F # endif @@ -319,41 +374,14 @@ typedef __char32_t char32_t; # define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) -// Objective-C++ features (opt-in) -# if __has_feature(objc_arc) -# define _LIBCPP_HAS_OBJC_ARC 1 -# else -# define _LIBCPP_HAS_OBJC_ARC 0 -# endif - -# if __has_feature(objc_arc_weak) -# define _LIBCPP_HAS_OBJC_ARC_WEAK 1 -# else -# define _LIBCPP_HAS_OBJC_ARC_WEAK 0 -# endif - -# if __has_extension(blocks) -# define _LIBCPP_HAS_EXTENSION_BLOCKS 1 -# else -# define _LIBCPP_HAS_EXTENSION_BLOCKS 0 -# endif - -# if _LIBCPP_HAS_EXTENSION_BLOCKS && defined(__APPLE__) +# if __has_extension(blocks) && defined(__APPLE__) # define _LIBCPP_HAS_BLOCKS_RUNTIME 1 # else # define _LIBCPP_HAS_BLOCKS_RUNTIME 0 # endif -# if __has_feature(address_sanitizer) -# define _LIBCPP_HAS_ASAN 1 -# else -# define _LIBCPP_HAS_ASAN 0 -# endif - # define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__)) -# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__ - # if defined(_LIBCPP_OBJECT_FORMAT_COFF) # ifdef _DLL @@ -363,35 +391,30 @@ typedef __char32_t char32_t; # endif # if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) || (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY)) -# define _LIBCPP_DLL_VIS # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI # elif defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_DLL_VIS __declspec(dllexport) # if defined(__MINGW32__) -# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __declspec(dllexport) # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # else # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS -# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __declspec(dllexport) # endif -# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_OVERRIDABLE_FUNC_VIS __declspec(dllexport) # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport) # else -# define _LIBCPP_DLL_VIS __declspec(dllimport) -# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __declspec(dllimport) # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport) # endif # define _LIBCPP_HIDDEN -# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS -# define _LIBCPP_TEMPLATE_VIS # define _LIBCPP_TEMPLATE_DATA_VIS -# define _LIBCPP_TYPE_VISIBILITY_DEFAULT +# define _LIBCPP_NAMESPACE_VISIBILITY # else @@ -412,24 +435,12 @@ typedef __char32_t char32_t; # define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default") # endif -# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) -// The inline should be removed once PR32114 is resolved -# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN -# else -# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS -# endif - -// GCC doesn't support the type_visibility attribute, so we have to keep the visibility attribute on templates -# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && !__has_attribute(__type_visibility__) -# define _LIBCPP_TEMPLATE_VIS __attribute__((__visibility__("default"))) -# else -# define _LIBCPP_TEMPLATE_VIS -# endif - # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__) -# define _LIBCPP_TYPE_VISIBILITY_DEFAULT __attribute__((__type_visibility__("default"))) +# define _LIBCPP_NAMESPACE_VISIBILITY __attribute__((__type_visibility__("default"))) +# elif !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCPP_NAMESPACE_VISIBILITY __attribute__((__visibility__("default"))) # else -# define _LIBCPP_TYPE_VISIBILITY_DEFAULT +# define _LIBCPP_NAMESPACE_VISIBILITY # endif # endif // defined(_LIBCPP_OBJECT_FORMAT_COFF) @@ -549,24 +560,17 @@ typedef __char32_t char32_t; # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # endif -// TODO: Remove this workaround once we drop support for Clang 16 -# if __has_warning("-Wc++23-extensions") -# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++23-extensions") -# else -# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++2b-extensions") -# endif - // Clang modules take a significant compile time hit when pushing and popping diagnostics. -// Since all the headers are marked as system headers in the modulemap, we can simply disable this -// pushing and popping when building with clang modules. -# if !__has_feature(modules) +// Since all the headers are marked as system headers unless _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER is defined, we can +// simply disable this pushing and popping when _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER isn't defined. +# ifdef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER # define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ _LIBCPP_DIAGNOSTIC_PUSH \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++11-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ - _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION \ + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++23-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ @@ -577,15 +581,27 @@ typedef __char32_t char32_t; # define _LIBCPP_POP_EXTENSION_DIAGNOSTICS # endif -// Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. // clang-format off -# define _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ - namespace _LIBCPP_TYPE_VISIBILITY_DEFAULT std { \ - inline namespace _LIBCPP_ABI_NAMESPACE { -# define _LIBCPP_END_NAMESPACE_STD }} _LIBCPP_POP_EXTENSION_DIAGNOSTICS -#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL namespace std { namespace experimental { -#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL }} +// The unversioned namespace is used when we want to be ABI compatible with other standard libraries in some way. There +// are two main categories where that's the case: +// - Historically, we have made exception types ABI compatible with libstdc++ to allow throwing them between libstdc++ +// and libc++. This is not used anymore for new exception types, since there is no use-case for it anymore. +// - Types and functions which are used by the compiler are in the unversioned namespace, since the compiler has to know +// their mangling without the appropriate declaration in some cases. +// If it's not clear whether using the unversioned namespace is the correct thing to do, it's not. The versioned +// namespace (_LIBCPP_BEGIN_NAMESPACE_STD) should almost always be used. +# define _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD \ + _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS namespace _LIBCPP_NAMESPACE_VISIBILITY std { + +# define _LIBCPP_END_UNVERSIONED_NAMESPACE_STD } _LIBCPP_POP_EXTENSION_DIAGNOSTICS + +# define _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD inline namespace _LIBCPP_ABI_NAMESPACE { +# define _LIBCPP_END_NAMESPACE_STD } _LIBCPP_END_UNVERSIONED_NAMESPACE_STD + +// TODO: This should really be in the versioned namespace +#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD namespace experimental { +#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL } _LIBCPP_END_UNVERSIONED_NAMESPACE_STD #define _LIBCPP_BEGIN_NAMESPACE_LFTS _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v1 { #define _LIBCPP_END_NAMESPACE_LFTS } _LIBCPP_END_NAMESPACE_EXPERIMENTAL @@ -663,7 +679,10 @@ typedef __char32_t char32_t; # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) + __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) # define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0 # else # define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1 @@ -675,10 +694,6 @@ typedef __char32_t char32_t; # define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1 # endif -# if defined(__APPLE__) || defined(__FreeBSD__) -# define _LIBCPP_HAS_DEFAULTRUNELOCALE -# endif - # if defined(__APPLE__) || defined(__FreeBSD__) # define _LIBCPP_WCTYPE_IS_MASK # endif @@ -741,8 +756,10 @@ typedef __char32_t char32_t; # if _LIBCPP_STD_VER >= 26 # define _LIBCPP_DEPRECATED_IN_CXX26 _LIBCPP_DEPRECATED +# define _LIBCPP_DEPRECATED_IN_CXX26_(m) _LIBCPP_DEPRECATED_(m) # else # define _LIBCPP_DEPRECATED_IN_CXX26 +# define _LIBCPP_DEPRECATED_IN_CXX26_(m) # endif # if _LIBCPP_HAS_CHAR8_T @@ -937,23 +954,6 @@ typedef __char32_t char32_t; # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS # endif -// Work around the attribute handling in clang. When both __declspec and -// __attribute__ are present, the processing goes awry preventing the definition -// of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus -// combining the two does work. -# if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS) && defined(__clang__) && \ - __has_attribute(acquire_capability) && !defined(_MSC_VER) -# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS 1 -# else -# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS 0 -# endif - -# if _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS -# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x)) -# else -# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) -# endif - # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_CONSTINIT constinit # elif __has_attribute(__require_constant_initialization__) @@ -1064,9 +1064,8 @@ typedef __char32_t char32_t; # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") # endif -// TODO(varconst): currently, there are bugs in Clang's intrinsics when handling Objective-C++ `id`, so don't use -// compiler intrinsics in the Objective-C++ mode. -# ifdef __OBJC__ +// TODO(LLVM 22): Remove the workaround +# if defined(__OBJC__) && (!defined(_LIBCPP_CLANG_VER) || _LIBCPP_CLANG_VER < 2001) # define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS # endif @@ -1119,26 +1118,28 @@ typedef __char32_t char32_t; // Optional attributes - these are useful for a better QoI, but not required to be available +# define _LIBCPP_NOALIAS __attribute__((__malloc__)) +# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] +# define _LIBCPP_NO_SANITIZE(...) __attribute__((__no_sanitize__(__VA_ARGS__))) +# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((__init_priority__(100))) +# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ + __attribute__((__format__(archetype, format_string_index, first_format_arg_index))) +# define _LIBCPP_PACKED __attribute__((__packed__)) + # if __has_attribute(__no_sanitize__) && !defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_NO_CFI __attribute__((__no_sanitize__("cfi"))) # else # define _LIBCPP_NO_CFI # endif -# if __has_attribute(__malloc__) -# define _LIBCPP_NOALIAS __attribute__((__malloc__)) -# else -# define _LIBCPP_NOALIAS -# endif - # if __has_attribute(__using_if_exists__) # define _LIBCPP_USING_IF_EXISTS __attribute__((__using_if_exists__)) # else # define _LIBCPP_USING_IF_EXISTS # endif -# if __has_attribute(__no_destroy__) -# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) +# if __has_cpp_attribute(_Clang::__no_destroy__) +# define _LIBCPP_NO_DESTROY [[_Clang::__no_destroy__]] # else # define _LIBCPP_NO_DESTROY # endif @@ -1149,15 +1150,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DIAGNOSE_WARNING(...) # endif -// Use a function like macro to imply that it must be followed by a semicolon -# if __has_cpp_attribute(fallthrough) -# define _LIBCPP_FALLTHROUGH() [[fallthrough]] -# elif __has_attribute(__fallthrough__) -# define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__)) -# else -# define _LIBCPP_FALLTHROUGH() ((void)0) -# endif - # if __has_cpp_attribute(_Clang::__lifetimebound__) # define _LIBCPP_LIFETIMEBOUND [[_Clang::__lifetimebound__]] # else @@ -1170,8 +1162,6 @@ typedef __char32_t char32_t; # define _LIBCPP_NOESCAPE # endif -# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] - # if __has_cpp_attribute(_Clang::__no_specializations__) # define _LIBCPP_NO_SPECIALIZATIONS \ [[_Clang::__no_specializations__("Users are not allowed to specialize this standard library entity")]] @@ -1179,43 +1169,70 @@ typedef __char32_t char32_t; # define _LIBCPP_NO_SPECIALIZATIONS # endif -# if __has_attribute(__standalone_debug__) -# define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) +# if __has_cpp_attribute(_Clang::__standalone_debug__) +# define _LIBCPP_STANDALONE_DEBUG [[_Clang::__standalone_debug__]] # else # define _LIBCPP_STANDALONE_DEBUG # endif -# if __has_attribute(__preferred_name__) -# define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x))) +# if __has_cpp_attribute(_Clang::__preferred_name__) +# define _LIBCPP_PREFERRED_NAME(x) [[_Clang::__preferred_name__(x)]] # else # define _LIBCPP_PREFERRED_NAME(x) # endif -# if __has_attribute(__no_sanitize__) -# define _LIBCPP_NO_SANITIZE(...) __attribute__((__no_sanitize__(__VA_ARGS__))) +# if __has_cpp_attribute(_Clang::__scoped_lockable__) +# define _LIBCPP_SCOPED_LOCKABLE [[_Clang::__scoped_lockable__]] # else -# define _LIBCPP_NO_SANITIZE(...) +# define _LIBCPP_SCOPED_LOCKABLE # endif -# if __has_attribute(__init_priority__) -# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((__init_priority__(100))) +# if __has_cpp_attribute(_Clang::__capability__) +# define _LIBCPP_CAPABILITY(...) [[_Clang::__capability__(__VA_ARGS__)]] # else -# define _LIBCPP_INIT_PRIORITY_MAX +# define _LIBCPP_CAPABILITY(...) # endif -# if __has_attribute(__format__) -// The attribute uses 1-based indices for ordinary and static member functions. -// The attribute uses 2-based indices for non-static member functions. -# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ - __attribute__((__format__(archetype, format_string_index, first_format_arg_index))) +# if __has_attribute(__acquire_capability__) +# define _LIBCPP_ACQUIRE_CAPABILITY(...) __attribute__((__acquire_capability__(__VA_ARGS__))) # else -# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */ +# define _LIBCPP_ACQUIRE_CAPABILITY(...) # endif -# if __has_attribute(__packed__) -# define _LIBCPP_PACKED __attribute__((__packed__)) +# if __has_cpp_attribute(_Clang::__try_acquire_capability__) +# define _LIBCPP_TRY_ACQUIRE_CAPABILITY(...) [[_Clang::__try_acquire_capability__(__VA_ARGS__)]] # else -# define _LIBCPP_PACKED +# define _LIBCPP_TRY_ACQUIRE_CAPABILITY(...) +# endif + +# if __has_cpp_attribute(_Clang::__acquire_shared_capability__) +# define _LIBCPP_ACQUIRE_SHARED_CAPABILITY [[_Clang::__acquire_shared_capability__]] +# else +# define _LIBCPP_ACQUIRE_SHARED_CAPABILITY +# endif + +# if __has_cpp_attribute(_Clang::__try_acquire_shared_capability__) +# define _LIBCPP_TRY_ACQUIRE_SHARED_CAPABILITY(...) [[_Clang::__try_acquire_shared_capability__(__VA_ARGS__)]] +# else +# define _LIBCPP_TRY_ACQUIRE_SHARED_CAPABILITY(...) +# endif + +# if __has_cpp_attribute(_Clang::__release_capability__) +# define _LIBCPP_RELEASE_CAPABILITY [[_Clang::__release_capability__]] +# else +# define _LIBCPP_RELEASE_CAPABILITY +# endif + +# if __has_cpp_attribute(_Clang::__release_shared_capability__) +# define _LIBCPP_RELEASE_SHARED_CAPABILITY [[_Clang::__release_shared_capability__]] +# else +# define _LIBCPP_RELEASE_SHARED_CAPABILITY +# endif + +# if __has_attribute(__requires_capability__) +# define _LIBCPP_REQUIRES_CAPABILITY(...) __attribute__((__requires_capability__(__VA_ARGS__))) +# else +# define _LIBCPP_REQUIRES_CAPABILITY(...) # endif # if defined(_LIBCPP_ABI_MICROSOFT) && __has_declspec_attribute(empty_bases) @@ -1231,6 +1248,13 @@ typedef __char32_t char32_t; # define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK # endif +# if __has_feature(nullability) +# define _LIBCPP_DIAGNOSE_NULLPTR _Nonnull +# else +# define _LIBCPP_DIAGNOSE_NULLPTR +# endif + +// TODO(LLVM 22): Remove this macro once LLVM19 support ends. __cpp_explicit_this_parameter has been set in LLVM20. // Clang-18 has support for deducing this, but it does not set the FTM. # if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800) # define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 1 diff --git a/lib/libcxx/include/__configuration/abi.h b/lib/libcxx/include/__configuration/abi.h index 1806dbc7c1..fb82613ea0 100644 --- a/lib/libcxx/include/__configuration/abi.h +++ b/lib/libcxx/include/__configuration/abi.h @@ -38,92 +38,47 @@ #endif #if _LIBCPP_ABI_VERSION >= 2 -// Change short string representation so that string data starts at offset 0, -// improving its alignment in some cases. -# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT -// Fix deque iterator type in order to support incomplete types. -# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE -// Fix undefined behavior in how std::list stores its linked nodes. -# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB -// Fix undefined behavior in how __tree stores its end and parent nodes. -# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB -// Fix undefined behavior in how __hash_table stores its pointer types. -# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB -# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB -# define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE +// TODO: Move the description of the remaining ABI flags to ABIGuarantees.rst or remove them. + // Override the default return value of exception::what() for bad_function_call::what() // with a string that is specific to bad_function_call (see http://wg21.link/LWG2233). // This is an ABI break on platforms that sign and authenticate vtable function pointers // because it changes the mangling of the virtual function located in the vtable, which // changes how it gets signed. # define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE -// Enable optimized version of __do_get_(un)signed which avoids redundant copies. -# define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET -// Give reverse_iterator one data member of type T, not two. -// Also, in C++17 and later, don't derive iterator types from std::iterator. -# define _LIBCPP_ABI_NO_ITERATOR_BASES -// Use the smallest possible integer type to represent the index of the variant. -// Previously libc++ used "unsigned int" exclusively. -# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION -// Unstable attempt to provide a more optimized std::function -# define _LIBCPP_ABI_OPTIMIZED_FUNCTION -// All the regex constants must be distinct and nonzero. -# define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO -// Re-worked external template instantiations for std::string with a focus on -// performance and fast-path inlining. -# define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION -// Enable clang::trivial_abi on std::unique_ptr. -# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI -// Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr -# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI -// std::random_device holds some state when it uses an implementation that gets -// entropy from a file (see _LIBCPP_USING_DEV_RANDOM). When switching from this -// implementation to another one on a platform that has already shipped -// std::random_device, one needs to retain the same object layout to remain ABI -// compatible. This switch removes these workarounds for platforms that don't care -// about ABI compatibility. -# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT -// Don't export the legacy __basic_string_common class and its methods from the built library. -# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON -// Don't export the legacy __vector_base_common class and its methods from the built library. -# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON // According to the Standard, `bitset::operator[] const` returns bool # define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL -// Fix the implementation of CityHash used for std::hash. -// This is an ABI break because `std::hash` will return a different result, -// which means that hashing the same object in translation units built against -// different versions of libc++ can return inconsistent results. This is especially -// tricky since std::hash is used in the implementation of unordered containers. -// -// The incorrect implementation of CityHash has the problem that it drops some -// bits on the floor. -# define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION -// Remove the base 10 implementation of std::to_chars from the dylib. -// The implementation moved to the header, but we still export the symbols from -// the dylib for backwards compatibility. + +// In LLVM 20, we've changed to take these ABI breaks unconditionally. These flags only exist in case someone is running +// into the static_asserts we added to catch the ABI break and don't care that it is one. +// TODO(LLVM 22): Remove these flags +# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB +# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB +# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB +# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB + +// These flags are documented in ABIGuarantees.rst +# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT +# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON +# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON # define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10 -// Define std::array/std::string_view iterators to be __wrap_iters instead of raw -// pointers, which prevents people from relying on a non-portable implementation -// detail. This is especially useful because enabling bounded iterators hardening -// requires code not to make these assumptions. +# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI +# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI +# define _LIBCPP_ABI_FIX_CITYHASH_IMPLEMENTATION +# define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE +# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE +# define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE +# define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING +# define _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE +# define _LIBCPP_ABI_NO_ITERATOR_BASES +# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT +# define _LIBCPP_ABI_OPTIMIZED_FUNCTION +# define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO +# define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION # define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_ARRAY # define _LIBCPP_ABI_USE_WRAP_ITER_IN_STD_STRING_VIEW -// Dont' add an inline namespace for `std::filesystem` -# define _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE -// std::basic_ios uses WEOF to indicate that the fill value is -// uninitialized. However, on platforms where the size of char_type is -// equal to or greater than the size of int_type and char_type is unsigned, -// std::char_traits::eq_int_type() cannot distinguish between WEOF -// and WCHAR_MAX. This ABI setting determines whether we should instead track whether the fill -// value has been initialized using a separate boolean, which changes the ABI. -# define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE -// Historically, libc++ used a type called `__compressed_pair` to reduce storage needs in cases of empty types (e.g. an -// empty allocator in std::vector). We switched to using `[[no_unique_address]]`. However, for ABI compatibility reasons -// we had to add artificial padding in a few places. -// -// This setting disables the addition of such artificial padding, leading to a more optimal -// representation for several types. -# define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING +# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION + #elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support @@ -138,7 +93,7 @@ # endif // Feature macros for disabling pre ABI v1 features. All of these options // are deprecated. -# if defined(__FreeBSD__) && __FreeBSD__ < 14 +# if defined(__FreeBSD__) # define _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR # endif #endif @@ -153,35 +108,6 @@ // The macro below is used for all classes whose ABI have changed as part of fixing these bugs. #define _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS __attribute__((__abi_tag__("llvm18_nua"))) -// Changes the iterator type of select containers (see below) to a bounded iterator that keeps track of whether it's -// within the bounds of the original container and asserts it on every dereference. -// -// ABI impact: changes the iterator type of the relevant containers. -// -// Supported containers: -// - `span`; -// - `string_view`. -// #define _LIBCPP_ABI_BOUNDED_ITERATORS - -// Changes the iterator type of `basic_string` to a bounded iterator that keeps track of whether it's within the bounds -// of the original container and asserts it on every dereference and when performing iterator arithmetics. -// -// ABI impact: changes the iterator type of `basic_string` and its specializations, such as `string` and `wstring`. -// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STRING - -// Changes the iterator type of `vector` to a bounded iterator that keeps track of whether it's within the bounds of the -// original container and asserts it on every dereference and when performing iterator arithmetics. Note: this doesn't -// yet affect `vector`. -// -// ABI impact: changes the iterator type of `vector` (except `vector`). -// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR - -// Changes the iterator type of `array` to a bounded iterator that keeps track of whether it's within the bounds of the -// container and asserts it on every dereference and when performing iterator arithmetic. -// -// ABI impact: changes the iterator type of `array`, its size and its layout. -// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY - // [[msvc::no_unique_address]] seems to mostly affect empty classes, so the padding scheme for Itanium doesn't work. #if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING) # define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING diff --git a/lib/libcxx/include/__configuration/availability.h b/lib/libcxx/include/__configuration/availability.h index f9e52a690c..ae58e36b50 100644 --- a/lib/libcxx/include/__configuration/availability.h +++ b/lib/libcxx/include/__configuration/availability.h @@ -69,7 +69,13 @@ // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. -#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) +// +// We also allow users to force-disable availability markup via the `_LIBCPP_DISABLE_AVAILABILITY` +// macro because that is the only way to work around a Clang bug related to availability +// attributes: https://github.com/llvm/llvm-project/issues/134151. +// Once that bug has been fixed, we should remove the macro. +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || \ + !defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_DISABLE_AVAILABILITY) # undef _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS # define _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS 0 #endif @@ -78,6 +84,9 @@ // in all versions of the library are available. #if !_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS +# define _LIBCPP_INTRODUCED_IN_LLVM_21 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE /* nothing */ + # define _LIBCPP_INTRODUCED_IN_LLVM_20 1 # define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE /* nothing */ @@ -107,13 +116,15 @@ # define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH /* nothing */ # define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_4 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_4_ATTRIBUTE /* nothing */ - #elif defined(__APPLE__) // clang-format off +// LLVM 21 +// TODO: Fill this in +# define _LIBCPP_INTRODUCED_IN_LLVM_21 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE __attribute__((unavailable)) + // LLVM 20 // TODO: Fill this in # define _LIBCPP_INTRODUCED_IN_LLVM_20 0 @@ -244,14 +255,6 @@ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") -// LLVM 4 -# if defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 50000 -# define _LIBCPP_INTRODUCED_IN_LLVM_4 0 -# else -# define _LIBCPP_INTRODUCED_IN_LLVM_4 1 -# endif -# define _LIBCPP_INTRODUCED_IN_LLVM_4_ATTRIBUTE __attribute__((availability(watchos, strict, introduced = 5.0))) - // clang-format on #else @@ -263,23 +266,6 @@ #endif -// These macros control the availability of std::bad_optional_access and -// other exception types. These were put in the shared library to prevent -// code bloat from every user program defining the vtable for these exception -// types. -// -// Note that when exceptions are disabled, the methods that normally throw -// these exceptions can be used even on older deployment targets, but those -// methods will abort instead of throwing. -#define _LIBCPP_AVAILABILITY_HAS_BAD_OPTIONAL_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4 -#define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4_ATTRIBUTE - -#define _LIBCPP_AVAILABILITY_HAS_BAD_VARIANT_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4 -#define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCPP_INTRODUCED_IN_LLVM_4_ATTRIBUTE - -#define _LIBCPP_AVAILABILITY_HAS_BAD_ANY_CAST _LIBCPP_INTRODUCED_IN_LLVM_4 -#define _LIBCPP_AVAILABILITY_BAD_ANY_CAST _LIBCPP_INTRODUCED_IN_LLVM_4_ATTRIBUTE - // These macros control the availability of all parts of that // depend on something in the dylib. #define _LIBCPP_AVAILABILITY_HAS_FILESYSTEM_LIBRARY _LIBCPP_INTRODUCED_IN_LLVM_9 @@ -359,18 +345,15 @@ #define _LIBCPP_AVAILABILITY_HAS_FROM_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_20 #define _LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE -// Define availability attributes that depend on _LIBCPP_HAS_EXCEPTIONS. -// Those are defined in terms of the availability attributes above, and -// should not be vendor-specific. -#if !_LIBCPP_HAS_EXCEPTIONS -# define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST -# define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS -#else -# define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_ANY_CAST -# define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS -#endif +// This controls whether `std::__hash_memory` is available in the dylib, which +// is used for some `std::hash` specializations. +#define _LIBCPP_AVAILABILITY_HAS_HASH_MEMORY _LIBCPP_INTRODUCED_IN_LLVM_21 +// No attribute, since we've had hash in the headers before + +// This controls whether we provide a message for `bad_function_call::what()` that specific to `std::bad_function_call`. +// See https://wg21.link/LWG2233. This requires `std::bad_function_call::what()` to be available in the dylib. +#define _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE _LIBCPP_INTRODUCED_IN_LLVM_21 +// No attribute, since we've had bad_function_call::what() in the headers before // Define availability attributes that depend on both // _LIBCPP_HAS_EXCEPTIONS and _LIBCPP_HAS_RTTI. diff --git a/lib/libcxx/include/__configuration/compiler.h b/lib/libcxx/include/__configuration/compiler.h index caedfa9fd8..54025c5b22 100644 --- a/lib/libcxx/include/__configuration/compiler.h +++ b/lib/libcxx/include/__configuration/compiler.h @@ -33,8 +33,8 @@ // Warn if a compiler version is used that is not supported anymore // LLVM RELEASE Update the minimum compiler versions # if defined(_LIBCPP_CLANG_VER) -# if _LIBCPP_CLANG_VER < 1800 -# warning "Libc++ only supports Clang 18 and later" +# if _LIBCPP_CLANG_VER < 1900 +# warning "Libc++ only supports Clang 19 and later" # endif # elif defined(_LIBCPP_APPLE_CLANG_VER) # if _LIBCPP_APPLE_CLANG_VER < 1500 diff --git a/lib/libcxx/include/__configuration/platform.h b/lib/libcxx/include/__configuration/platform.h index b03d7c0f39..1a83b0dc27 100644 --- a/lib/libcxx/include/__configuration/platform.h +++ b/lib/libcxx/include/__configuration/platform.h @@ -42,6 +42,13 @@ # endif #endif +// This is required in order for _NEWLIB_VERSION to be defined in places where we use it. +// TODO: We shouldn't be including arbitrarily-named headers from libc++ since this can break valid +// user code. Move code paths that need _NEWLIB_VERSION to another customization mechanism. +#if __has_include() +# include +#endif + #ifndef __BYTE_ORDER__ # error \ "Your compiler doesn't seem to define __BYTE_ORDER__, which is required by libc++ to know the endianness of your target platform" diff --git a/lib/libcxx/include/__coroutine/coroutine_handle.h b/lib/libcxx/include/__coroutine/coroutine_handle.h index e2cde20498..b7add25851 100644 --- a/lib/libcxx/include/__coroutine/coroutine_handle.h +++ b/lib/libcxx/include/__coroutine/coroutine_handle.h @@ -28,10 +28,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD // [coroutine.handle] template -struct _LIBCPP_TEMPLATE_VIS coroutine_handle; +struct coroutine_handle; template <> -struct _LIBCPP_TEMPLATE_VIS coroutine_handle { +struct coroutine_handle { public: // [coroutine.handle.con], construct/reset constexpr coroutine_handle() noexcept = default; @@ -93,7 +93,7 @@ operator<=>(coroutine_handle<> __x, coroutine_handle<> __y) noexcept { } template -struct _LIBCPP_TEMPLATE_VIS coroutine_handle { +struct coroutine_handle { public: // [coroutine.handle.con], construct/reset constexpr coroutine_handle() noexcept = default; @@ -172,6 +172,6 @@ struct hash> { _LIBCPP_END_NAMESPACE_STD -#endif // __LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 #endif // _LIBCPP___COROUTINE_COROUTINE_HANDLE_H diff --git a/lib/libcxx/include/__coroutine/coroutine_traits.h b/lib/libcxx/include/__coroutine/coroutine_traits.h index 78f05341f7..60f7ca2311 100644 --- a/lib/libcxx/include/__coroutine/coroutine_traits.h +++ b/lib/libcxx/include/__coroutine/coroutine_traits.h @@ -43,6 +43,6 @@ struct coroutine_traits : public __coroutine_traits_sfinae<_Ret> {}; _LIBCPP_END_NAMESPACE_STD -#endif // __LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 #endif // _LIBCPP___COROUTINE_COROUTINE_TRAITS_H diff --git a/lib/libcxx/include/__coroutine/noop_coroutine_handle.h b/lib/libcxx/include/__coroutine/noop_coroutine_handle.h index da13d57960..2b2838b6bf 100644 --- a/lib/libcxx/include/__coroutine/noop_coroutine_handle.h +++ b/lib/libcxx/include/__coroutine/noop_coroutine_handle.h @@ -28,7 +28,7 @@ struct noop_coroutine_promise {}; // [coroutine.handle.noop] template <> -struct _LIBCPP_TEMPLATE_VIS coroutine_handle { +struct coroutine_handle { public: // [coroutine.handle.noop.conv], conversion _LIBCPP_HIDE_FROM_ABI constexpr operator coroutine_handle<>() const noexcept { @@ -94,6 +94,6 @@ inline _LIBCPP_HIDE_FROM_ABI noop_coroutine_handle noop_coroutine() noexcept { r _LIBCPP_END_NAMESPACE_STD -#endif // __LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 #endif // _LIBCPP___COROUTINE_NOOP_COROUTINE_HANDLE_H diff --git a/lib/libcxx/include/__coroutine/trivial_awaitables.h b/lib/libcxx/include/__coroutine/trivial_awaitables.h index b604bd3c2d..7cb7f4dfeb 100644 --- a/lib/libcxx/include/__coroutine/trivial_awaitables.h +++ b/lib/libcxx/include/__coroutine/trivial_awaitables.h @@ -35,6 +35,6 @@ struct suspend_always { _LIBCPP_END_NAMESPACE_STD -#endif // __LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 #endif // __LIBCPP___COROUTINE_TRIVIAL_AWAITABLES_H diff --git a/lib/libcxx/include/__cstddef/byte.h b/lib/libcxx/include/__cstddef/byte.h index 09e1d75e0b..3d97db1bea 100644 --- a/lib/libcxx/include/__cstddef/byte.h +++ b/lib/libcxx/include/__cstddef/byte.h @@ -19,7 +19,7 @@ #endif #if _LIBCPP_STD_VER >= 17 -namespace std { // purposefully not versioned +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD enum class byte : unsigned char {}; @@ -79,7 +79,7 @@ template ::value, int> = 0> return static_cast<_Integer>(__b); } -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___CSTDDEF_BYTE_H diff --git a/lib/libcxx/include/__debug_utils/sanitizers.h b/lib/libcxx/include/__debug_utils/sanitizers.h index 73d192711e..058feab026 100644 --- a/lib/libcxx/include/__debug_utils/sanitizers.h +++ b/lib/libcxx/include/__debug_utils/sanitizers.h @@ -17,7 +17,7 @@ # pragma GCC system_header #endif -#if _LIBCPP_HAS_ASAN +#if __has_feature(address_sanitizer) extern "C" { _LIBCPP_EXPORTED_FROM_ABI void @@ -28,12 +28,12 @@ _LIBCPP_EXPORTED_FROM_ABI int __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, const void*, const void*); } -#endif // _LIBCPP_HAS_ASAN +#endif // __has_feature(address_sanitizer) _LIBCPP_BEGIN_NAMESPACE_STD // ASan choices -#if _LIBCPP_HAS_ASAN +#if __has_feature(address_sanitizer) # define _LIBCPP_HAS_ASAN_CONTAINER_ANNOTATIONS_FOR_ALL_ALLOCATORS 1 #endif @@ -57,7 +57,7 @@ _LIBCPP_HIDE_FROM_ABI void __annotate_double_ended_contiguous_container( const void* __last_old_contained, const void* __first_new_contained, const void* __last_new_contained) { -#if !_LIBCPP_HAS_ASAN +#if !__has_feature(address_sanitizer) (void)__first_storage; (void)__last_storage; (void)__first_old_contained; @@ -86,7 +86,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __annotate_contiguous_c const void* __last_storage, const void* __old_last_contained, const void* __new_last_contained) { -#if !_LIBCPP_HAS_ASAN +#if !__has_feature(address_sanitizer) (void)__first_storage; (void)__last_storage; (void)__old_last_contained; diff --git a/lib/libcxx/include/__exception/exception.h b/lib/libcxx/include/__exception/exception.h index e724e1b99b..f7dab6e83a 100644 --- a/lib/libcxx/include/__exception/exception.h +++ b/lib/libcxx/include/__exception/exception.h @@ -21,7 +21,7 @@ # pragma GCC system_header #endif -namespace std { // purposefully not using versioning namespace +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD #if defined(_LIBCPP_ABI_VCRUNTIME) && (!defined(_HAS_EXCEPTIONS) || _HAS_EXCEPTIONS != 0) // The std::exception class was already included above, but we're explicit about this condition here for clarity. @@ -89,6 +89,6 @@ public: }; #endif // !_LIBCPP_ABI_VCRUNTIME -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP___EXCEPTION_EXCEPTION_H diff --git a/lib/libcxx/include/__exception/exception_ptr.h b/lib/libcxx/include/__exception/exception_ptr.h index 6257e6f729..796fa924be 100644 --- a/lib/libcxx/include/__exception/exception_ptr.h +++ b/lib/libcxx/include/__exception/exception_ptr.h @@ -15,6 +15,7 @@ #include <__memory/addressof.h> #include <__memory/construct_at.h> #include <__type_traits/decay.h> +#include <__type_traits/is_pointer.h> #include #include @@ -52,7 +53,7 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS __cxa_exception* __cxa_init_primary_exception( #endif -namespace std { // purposefully not using versioning namespace +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD #ifndef _LIBCPP_ABI_MICROSOFT @@ -62,11 +63,13 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { static exception_ptr __from_native_exception_pointer(void*) _NOEXCEPT; template - friend _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep) _NOEXCEPT; + friend _LIBCPP_HIDE_FROM_ABI exception_ptr __make_exception_ptr_explicit(_Ep&) _NOEXCEPT; public: - // exception_ptr is basically a COW string. + // exception_ptr is basically a COW string so it is trivially relocatable. + // It is also replaceable because assignment has normal value semantics. using __trivially_relocatable _LIBCPP_NODEBUG = exception_ptr; + using __replaceable _LIBCPP_NODEBUG = exception_ptr; _LIBCPP_HIDE_FROM_ABI exception_ptr() _NOEXCEPT : __ptr_() {} _LIBCPP_HIDE_FROM_ABI exception_ptr(nullptr_t) _NOEXCEPT : __ptr_() {} @@ -89,25 +92,21 @@ public: friend _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); }; -template -_LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { # if _LIBCPP_HAS_EXCEPTIONS -# if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION && __cplusplus >= 201103L +# if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION +template +_LIBCPP_HIDE_FROM_ABI exception_ptr __make_exception_ptr_explicit(_Ep& __e) _NOEXCEPT { using _Ep2 = __decay_t<_Ep>; - void* __ex = __cxxabiv1::__cxa_allocate_exception(sizeof(_Ep)); # ifdef __wasm__ - // In Wasm, a destructor returns its argument - (void)__cxxabiv1::__cxa_init_primary_exception( - __ex, const_cast(&typeid(_Ep)), [](void* __p) -> void* { + auto __cleanup = [](void* __p) -> void* { + std::__destroy_at(static_cast<_Ep2*>(__p)); + return __p; + }; # else - (void)__cxxabiv1::__cxa_init_primary_exception(__ex, const_cast(&typeid(_Ep)), [](void* __p) { + auto __cleanup = [](void* __p) { std::__destroy_at(static_cast<_Ep2*>(__p)); }; # endif - std::__destroy_at(static_cast<_Ep2*>(__p)); -# ifdef __wasm__ - return __p; -# endif - }); + (void)__cxxabiv1::__cxa_init_primary_exception(__ex, const_cast(&typeid(_Ep)), __cleanup); try { ::new (__ex) _Ep2(__e); @@ -116,19 +115,48 @@ _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { __cxxabiv1::__cxa_free_exception(__ex); return current_exception(); } -# else +} +# endif + +template +_LIBCPP_HIDE_FROM_ABI exception_ptr __make_exception_ptr_via_throw(_Ep& __e) _NOEXCEPT { try { throw __e; } catch (...) { return current_exception(); } -# endif -# else - ((void)__e); - std::abort(); -# endif } +template +_LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { + // Objective-C exceptions are thrown via pointer. When throwing an Objective-C exception, + // Clang generates a call to `objc_exception_throw` instead of the usual `__cxa_throw`. + // That function creates an exception with a special Objective-C typeinfo instead of + // the usual C++ typeinfo, since that is needed to implement the behavior documented + // at [1]). + // + // Because of this special behavior, we can't create an exception via `__cxa_init_primary_exception` + // for Objective-C exceptions, otherwise we'd bypass `objc_exception_throw`. See https://llvm.org/PR135089. + // + // [1]: + // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/Exceptions/Articles/Exceptions64Bit.html + if _LIBCPP_CONSTEXPR (is_pointer<_Ep>::value) { + return std::__make_exception_ptr_via_throw(__e); + } + +# if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION && !defined(_LIBCPP_CXX03_LANG) + return std::__make_exception_ptr_explicit(__e); +# else + return std::__make_exception_ptr_via_throw(__e); +# endif +} +# else // !_LIBCPP_HAS_EXCEPTIONS +template +_LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep) _NOEXCEPT { + std::abort(); +} +# endif // _LIBCPP_HAS_EXCEPTIONS + #else // _LIBCPP_ABI_MICROSOFT class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { @@ -171,6 +199,6 @@ _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { } #endif // _LIBCPP_ABI_MICROSOFT -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP___EXCEPTION_EXCEPTION_PTR_H diff --git a/lib/libcxx/include/__exception/nested_exception.h b/lib/libcxx/include/__exception/nested_exception.h index d560b6bbc3..90b14158d5 100644 --- a/lib/libcxx/include/__exception/nested_exception.h +++ b/lib/libcxx/include/__exception/nested_exception.h @@ -27,7 +27,7 @@ # pragma GCC system_header #endif -namespace std { // purposefully not using versioning namespace +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD class _LIBCPP_EXPORTED_FROM_ABI nested_exception { exception_ptr __ptr_; @@ -95,6 +95,6 @@ inline _LIBCPP_HIDE_FROM_ABI void rethrow_if_nested(const _Ep& __e) { template ::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI void rethrow_if_nested(const _Ep&) {} -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP___EXCEPTION_NESTED_EXCEPTION_H diff --git a/lib/libcxx/include/__exception/operations.h b/lib/libcxx/include/__exception/operations.h index 15520c558a..29d5c698a9 100644 --- a/lib/libcxx/include/__exception/operations.h +++ b/lib/libcxx/include/__exception/operations.h @@ -15,7 +15,7 @@ # pragma GCC system_header #endif -namespace std { // purposefully not using versioning namespace +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS) || \ defined(_LIBCPP_BUILDING_LIBRARY) using unexpected_handler = void (*)(); @@ -37,6 +37,6 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr; _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT; [[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP___EXCEPTION_OPERATIONS_H diff --git a/lib/libcxx/include/__exception/terminate.h b/lib/libcxx/include/__exception/terminate.h index 0bfc3506d3..955a49c2b0 100644 --- a/lib/libcxx/include/__exception/terminate.h +++ b/lib/libcxx/include/__exception/terminate.h @@ -15,8 +15,8 @@ # pragma GCC system_header #endif -namespace std { // purposefully not using versioning namespace +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD [[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void terminate() _NOEXCEPT; -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP___EXCEPTION_TERMINATE_H diff --git a/lib/libcxx/include/__expected/expected.h b/lib/libcxx/include/__expected/expected.h index 03bbd1623e..0f446b8707 100644 --- a/lib/libcxx/include/__expected/expected.h +++ b/lib/libcxx/include/__expected/expected.h @@ -25,10 +25,12 @@ #include <__type_traits/is_assignable.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> +#include <__type_traits/is_core_convertible.h> #include <__type_traits/is_function.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_reference.h> +#include <__type_traits/is_replaceable.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/is_trivially_constructible.h> @@ -470,6 +472,8 @@ public: __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value && __libcpp_is_trivially_relocatable<_Err>::value, expected, void>; + using __replaceable _LIBCPP_NODEBUG = + __conditional_t<__is_replaceable_v<_Tp> && __is_replaceable_v<_Err>, expected, void>; template using rebind = expected<_Up, error_type>; @@ -1139,8 +1143,15 @@ public: // [expected.object.eq], equality operators template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y) requires(!is_void_v<_T2>) - _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y) { +# if _LIBCPP_STD_VER >= 26 + && requires { + { *__x == *__y } -> __core_convertible_to; + { __x.error() == __y.error() } -> __core_convertible_to; + } +# endif + { if (__x.__has_val() != __y.__has_val()) { return false; } else { @@ -1153,12 +1164,24 @@ public: } template - _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const _T2& __v) { + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const _T2& __v) +# if _LIBCPP_STD_VER >= 26 + requires(!__is_std_expected<_T2>::value) && requires { + { *__x == __v } -> __core_convertible_to; + } +# endif + { return __x.__has_val() && static_cast(__x.__val() == __v); } template - _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __e) { + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __e) +# if _LIBCPP_STD_VER >= 26 + requires requires { + { __x.error() == __e.error() } -> __core_convertible_to; + } +# endif + { return !__x.__has_val() && static_cast(__x.__unex() == __e.error()); } }; @@ -1851,7 +1874,13 @@ public: // [expected.void.eq], equality operators template requires is_void_v<_T2> - _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y) { + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y) +# if _LIBCPP_STD_VER >= 26 + requires requires { + { __x.error() == __y.error() } -> __core_convertible_to; + } +# endif + { if (__x.__has_val() != __y.__has_val()) { return false; } else { @@ -1860,7 +1889,13 @@ public: } template - _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __y) { + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __y) +# if _LIBCPP_STD_VER >= 26 + requires requires { + { __x.error() == __y.error() } -> __core_convertible_to; + } +# endif + { return !__x.__has_val() && static_cast(__x.__unex() == __y.error()); } }; diff --git a/lib/libcxx/include/__filesystem/directory_entry.h b/lib/libcxx/include/__filesystem/directory_entry.h index 11e07acdbe..5f236cf264 100644 --- a/lib/libcxx/include/__filesystem/directory_entry.h +++ b/lib/libcxx/include/__filesystem/directory_entry.h @@ -286,7 +286,7 @@ private: return; } if (__ec && (!__allow_dne || !__is_dne_error(__ec))) - __throw_filesystem_error(__msg, __p_, __ec); + filesystem::__throw_filesystem_error(__msg, __p_, __ec); } _LIBCPP_HIDE_FROM_ABI void __refresh(error_code* __ec = nullptr) { diff --git a/lib/libcxx/include/__filesystem/operations.h b/lib/libcxx/include/__filesystem/operations.h index 904023d2fb..29b6c2f798 100644 --- a/lib/libcxx/include/__filesystem/operations.h +++ b/lib/libcxx/include/__filesystem/operations.h @@ -66,6 +66,9 @@ _LIBCPP_EXPORTED_FROM_ABI bool __remove(const path&, error_code* __ec = nullptr) _LIBCPP_EXPORTED_FROM_ABI void __rename(const path& __from, const path& __to, error_code* __ec = nullptr); _LIBCPP_EXPORTED_FROM_ABI void __resize_file(const path&, uintmax_t __size, error_code* = nullptr); _LIBCPP_EXPORTED_FROM_ABI path __temp_directory_path(error_code* __ec = nullptr); +_LIBCPP_EXPORTED_FROM_ABI bool __fs_is_empty(const path& __p, error_code* __ec = nullptr); +_LIBCPP_EXPORTED_FROM_ABI void __permissions(const path&, perms, perm_options, error_code* = nullptr); +_LIBCPP_EXPORTED_FROM_ABI space_info __space(const path&, error_code* __ec = nullptr); inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p) { return __absolute(__p); } inline _LIBCPP_HIDE_FROM_ABI path absolute(const path& __p, error_code& __ec) { return __absolute(__p, &__ec); } @@ -182,7 +185,6 @@ inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p) { return is_dire inline _LIBCPP_HIDE_FROM_ABI bool is_directory(const path& __p, error_code& __ec) noexcept { return is_directory(__status(__p, &__ec)); } -_LIBCPP_EXPORTED_FROM_ABI bool __fs_is_empty(const path& __p, error_code* __ec = nullptr); inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p) { return __fs_is_empty(__p); } inline _LIBCPP_HIDE_FROM_ABI bool is_empty(const path& __p, error_code& __ec) { return __fs_is_empty(__p, &__ec); } inline _LIBCPP_HIDE_FROM_ABI bool is_fifo(file_status __s) noexcept { return __s.type() == file_type::fifo; } @@ -220,7 +222,6 @@ inline _LIBCPP_HIDE_FROM_ABI void last_write_time(const path& __p, file_time_typ inline _LIBCPP_HIDE_FROM_ABI void last_write_time(const path& __p, file_time_type __t, error_code& __ec) noexcept { __last_write_time(__p, __t, &__ec); } -_LIBCPP_EXPORTED_FROM_ABI void __permissions(const path&, perms, perm_options, error_code* = nullptr); inline _LIBCPP_HIDE_FROM_ABI void permissions(const path& __p, perms __prms, perm_options __opts = perm_options::replace) { __permissions(__p, __prms, __opts); @@ -281,7 +282,6 @@ inline _LIBCPP_HIDE_FROM_ABI void resize_file(const path& __p, uintmax_t __ns) { inline _LIBCPP_HIDE_FROM_ABI void resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { return __resize_file(__p, __ns, &__ec); } -_LIBCPP_EXPORTED_FROM_ABI space_info __space(const path&, error_code* __ec = nullptr); inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p) { return __space(__p); } inline _LIBCPP_HIDE_FROM_ABI space_info space(const path& __p, error_code& __ec) noexcept { return __space(__p, &__ec); diff --git a/lib/libcxx/include/__filesystem/path.h b/lib/libcxx/include/__filesystem/path.h index 0a751ba329..381e5678a5 100644 --- a/lib/libcxx/include/__filesystem/path.h +++ b/lib/libcxx/include/__filesystem/path.h @@ -17,7 +17,9 @@ #include <__fwd/functional.h> #include <__iterator/back_insert_iterator.h> #include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> #include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_pointer.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_pointer.h> @@ -27,7 +29,6 @@ #if _LIBCPP_HAS_LOCALIZATION # include // for quoted -# include #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -583,7 +584,7 @@ public: template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI path& operator+=(_ECharT __x) { - _PathCVT<_ECharT>::__append_source(__pn_, basic_string_view<_ECharT>(&__x, 1)); + _PathCVT<_ECharT>::__append_source(__pn_, basic_string_view<_ECharT>(std::addressof(__x), 1)); return *this; } diff --git a/lib/libcxx/include/__filesystem/u8path.h b/lib/libcxx/include/__filesystem/u8path.h index e13980298d..a701425e42 100644 --- a/lib/libcxx/include/__filesystem/u8path.h +++ b/lib/libcxx/include/__filesystem/u8path.h @@ -13,14 +13,9 @@ #include <__algorithm/unwrap_iter.h> #include <__config> #include <__filesystem/path.h> +#include <__locale> #include -// Only required on Windows for __widen_from_utf8, and included conservatively -// because it requires support for localization. -#if defined(_LIBCPP_WIN32API) -# include -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/lib/libcxx/include/__flat_map/flat_map.h b/lib/libcxx/include/__flat_map/flat_map.h index 9cc39c0a1e..bf193f6d3c 100644 --- a/lib/libcxx/include/__flat_map/flat_map.h +++ b/lib/libcxx/include/__flat_map/flat_map.h @@ -11,16 +11,15 @@ #define _LIBCPP___FLAT_MAP_FLAT_MAP_H #include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/lower_bound.h> #include <__algorithm/min.h> #include <__algorithm/ranges_adjacent_find.h> #include <__algorithm/ranges_equal.h> #include <__algorithm/ranges_inplace_merge.h> -#include <__algorithm/ranges_lower_bound.h> -#include <__algorithm/ranges_partition_point.h> #include <__algorithm/ranges_sort.h> #include <__algorithm/ranges_unique.h> -#include <__algorithm/ranges_upper_bound.h> #include <__algorithm/remove_if.h> +#include <__algorithm/upper_bound.h> #include <__assert> #include <__compare/synth_three_way.h> #include <__concepts/swappable.h> @@ -33,6 +32,7 @@ #include <__functional/invoke.h> #include <__functional/is_transparent.h> #include <__functional/operations.h> +#include <__fwd/memory.h> #include <__fwd/vector.h> #include <__iterator/concepts.h> #include <__iterator/distance.h> @@ -114,11 +114,12 @@ public: class value_compare { private: _LIBCPP_NO_UNIQUE_ADDRESS key_compare __comp_; - _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare(key_compare __c) : __comp_(__c) {} friend flat_map; public: - _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const_reference __x, const_reference __y) const { return __comp_(__x.first, __y.first); } }; @@ -137,14 +138,14 @@ private: public: // [flat.map.cons], construct/copy/destroy - _LIBCPP_HIDE_FROM_ABI flat_map() noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map() noexcept( is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> && is_nothrow_default_constructible_v<_Compare>) : __containers_(), __compare_() {} _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map&) = default; - _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other) noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(flat_map&& __other) noexcept( is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>) # if _LIBCPP_HAS_EXCEPTIONS @@ -165,7 +166,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map& __other, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(const flat_map& __other, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_tag{}, __alloc, __other.__containers_.keys, @@ -174,7 +175,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(flat_map&& __other, const _Allocator& __alloc) # if _LIBCPP_HAS_EXCEPTIONS try # endif // _LIBCPP_HAS_EXCEPTIONS @@ -191,7 +192,7 @@ public: # endif // _LIBCPP_HAS_EXCEPTIONS } - _LIBCPP_HIDE_FROM_ABI flat_map( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map( key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare()) : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), @@ -201,7 +202,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), @@ -211,7 +212,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const key_compare& __comp, @@ -222,7 +223,7 @@ public: __sort_and_unique(); } - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(sorted_unique_t, key_container_type __key_cont, mapped_container_type __mapped_cont, @@ -236,7 +237,7 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(sorted_unique_t, const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, @@ -250,12 +251,12 @@ public: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI - flat_map(sorted_unique_t, - const key_container_type& __key_cont, - const mapped_container_type& __mapped_cont, - const key_compare& __comp, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map( + sorted_unique_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), "flat_map keys and mapped containers have different size"); @@ -263,21 +264,22 @@ public: __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); } - _LIBCPP_HIDE_FROM_ABI explicit flat_map(const key_compare& __comp) : __containers_(), __compare_(__comp) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_map(const key_compare& __comp) + : __containers_(), __compare_(__comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(const key_compare& __comp, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(const key_compare& __comp, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI explicit flat_map(const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_map(const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {} template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) : __containers_(), __compare_(__comp) { insert(__first, __last); @@ -285,7 +287,7 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { insert(__first, __last); @@ -293,99 +295,105 @@ public: template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI flat_map(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_map(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { insert(__first, __last); } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t __fr, _Range&& __rg) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(from_range_t __fr, _Range&& __rg) : flat_map(__fr, std::forward<_Range>(__rg), key_compare()) {} template <_ContainerCompatibleRange _Range, class _Allocator> requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(from_range_t, _Range&& __rg, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { insert_range(std::forward<_Range>(__rg)); } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_map(__comp) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(from_range_t, _Range&& __rg, const key_compare& __comp) + : flat_map(__comp) { insert_range(std::forward<_Range>(__rg)); } template <_ContainerCompatibleRange _Range, class _Allocator> requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_map(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { insert_range(std::forward<_Range>(__rg)); } template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) : __containers_(), __compare_(__comp) { insert(sorted_unique, __first, __last); } template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI - flat_map(sorted_unique_t, - _InputIterator __first, - _InputIterator __last, - const key_compare& __comp, - const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map( + sorted_unique_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { insert(sorted_unique, __first, __last); } template requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { insert(sorted_unique, __first, __last); } - _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list __il, const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_map(initializer_list __il, const key_compare& __comp = key_compare()) : flat_map(__il.begin(), __il.end(), __comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) : flat_map(__il.begin(), __il.end(), __comp, __alloc) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list __il, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_map(initializer_list __il, const _Allocator& __alloc) : flat_map(__il.begin(), __il.end(), __alloc) {} - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(sorted_unique_t, initializer_list __il, const key_compare& __comp = key_compare()) : flat_map(sorted_unique, __il.begin(), __il.end(), __comp) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map(sorted_unique_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) : flat_map(sorted_unique, __il.begin(), __il.end(), __comp, __alloc) {} template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(sorted_unique_t, initializer_list __il, const _Allocator& __alloc) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_map(sorted_unique_t, initializer_list __il, const _Allocator& __alloc) : flat_map(sorted_unique, __il.begin(), __il.end(), __alloc) {} - _LIBCPP_HIDE_FROM_ABI flat_map& operator=(initializer_list __il) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map& operator=(initializer_list __il) { clear(); insert(__il); return *this; } - _LIBCPP_HIDE_FROM_ABI flat_map& operator=(const flat_map&) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map& operator=(const flat_map&) = default; - _LIBCPP_HIDE_FROM_ABI flat_map& operator=(flat_map&& __other) noexcept( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map& operator=(flat_map&& __other) noexcept( is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> && is_nothrow_move_assignable_v<_Compare>) { // No matter what happens, we always want to clear the other container before returning @@ -402,49 +410,65 @@ public: } // iterators - _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { return iterator(__containers_.keys.begin(), __containers_.values.begin()); } - _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { return const_iterator(__containers_.keys.begin(), __containers_.values.begin()); } - _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { return iterator(__containers_.keys.end(), __containers_.values.end()); } - _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { return const_iterator(__containers_.keys.end(), __containers_.values.end()); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { + return reverse_iterator(end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { + return reverse_iterator(begin()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } - _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } - _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { return begin(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { return end(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(begin()); + } // [flat.map.capacity], capacity - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const noexcept { + return __containers_.keys.empty(); + } - _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { + return __containers_.keys.size(); + } - _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { return std::min(__containers_.keys.max_size(), __containers_.values.max_size()); } // [flat.map.access], element access - _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __x) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](const key_type& __x) requires is_constructible_v { return try_emplace(__x).first->second; } - _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __x) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](key_type&& __x) requires is_constructible_v { return try_emplace(std::move(__x)).first->second; @@ -453,11 +477,11 @@ public: template requires(__is_compare_transparent && is_constructible_v && is_constructible_v && !is_convertible_v<_Kp &&, const_iterator> && !is_convertible_v<_Kp &&, iterator>) - _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](_Kp&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& operator[](_Kp&& __x) { return try_emplace(std::forward<_Kp>(__x)).first->second; } - _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const key_type& __x) { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const key_type&): Key does not exist"); @@ -465,7 +489,7 @@ public: return __it->second; } - _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const key_type& __x) const { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const key_type&) const: Key does not exist"); @@ -475,7 +499,7 @@ public: template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI mapped_type& at(const _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(const _Kp& __x) { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const K&): Key does not exist"); @@ -485,7 +509,7 @@ public: template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(const _Kp& __x) const { auto __it = find(__x); if (__it == end()) { std::__throw_out_of_range("flat_map::at(const K&) const: Key does not exist"); @@ -496,45 +520,49 @@ public: // [flat.map.modifiers], modifiers template requires is_constructible_v, _Args...> - _LIBCPP_HIDE_FROM_ABI pair emplace(_Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair emplace(_Args&&... __args) { std::pair __pair(std::forward<_Args>(__args)...); return __try_emplace(std::move(__pair.first), std::move(__pair.second)); } template requires is_constructible_v, _Args...> - _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __hint, _Args&&... __args) { std::pair __pair(std::forward<_Args>(__args)...); return __try_emplace_hint(__hint, std::move(__pair.first), std::move(__pair.second)).first; } - _LIBCPP_HIDE_FROM_ABI pair insert(const value_type& __x) { return emplace(__x); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair insert(const value_type& __x) { + return emplace(__x); + } - _LIBCPP_HIDE_FROM_ABI pair insert(value_type&& __x) { return emplace(std::move(__x)); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair insert(value_type&& __x) { + return emplace(std::move(__x)); + } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, const value_type& __x) { return emplace_hint(__hint, __x); } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, value_type&& __x) { return emplace_hint(__hint, std::move(__x)); } template requires is_constructible_v, _PairLike> - _LIBCPP_HIDE_FROM_ABI pair insert(_PairLike&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair insert(_PairLike&& __x) { return emplace(std::forward<_PairLike>(__x)); } template requires is_constructible_v, _PairLike> - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _PairLike&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, _PairLike&& __x) { return emplace_hint(__hint, std::forward<_PairLike>(__x)); } template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __first, _InputIterator __last) { if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { __reserve(__last - __first); } @@ -543,7 +571,8 @@ public: template requires __has_input_iterator_category<_InputIterator>::value - _LIBCPP_HIDE_FROM_ABI void insert(sorted_unique_t, _InputIterator __first, _InputIterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + insert(sorted_unique_t, _InputIterator __first, _InputIterator __last) { if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { __reserve(__last - __first); } @@ -552,7 +581,7 @@ public: } template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) { if constexpr (ranges::sized_range<_Range>) { __reserve(ranges::size(__range)); } @@ -560,19 +589,22 @@ public: __append_sort_merge_unique(ranges::begin(__range), ranges::end(__range)); } - _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { + insert(__il.begin(), __il.end()); + } - _LIBCPP_HIDE_FROM_ABI void insert(sorted_unique_t, initializer_list __il) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(sorted_unique_t, initializer_list __il) { insert(sorted_unique, __il.begin(), __il.end()); } - _LIBCPP_HIDE_FROM_ABI containers extract() && { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 containers extract() && { auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); auto __ret = std::move(__containers_); return __ret; } - _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { _LIBCPP_ASSERT_VALID_INPUT_RANGE( __key_cont.size() == __mapped_cont.size(), "flat_map keys and mapped containers have different size"); @@ -586,13 +618,15 @@ public: template requires is_constructible_v - _LIBCPP_HIDE_FROM_ABI pair try_emplace(const key_type& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + try_emplace(const key_type& __key, _Args&&... __args) { return __try_emplace(__key, std::forward<_Args>(__args)...); } template requires is_constructible_v - _LIBCPP_HIDE_FROM_ABI pair try_emplace(key_type&& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + try_emplace(key_type&& __key, _Args&&... __args) { return __try_emplace(std::move(__key), std::forward<_Args>(__args)...); } @@ -600,75 +634,84 @@ public: requires(__is_compare_transparent && is_constructible_v && is_constructible_v && !is_convertible_v<_Kp &&, const_iterator> && !is_convertible_v<_Kp &&, iterator>) - _LIBCPP_HIDE_FROM_ABI pair try_emplace(_Kp&& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair try_emplace(_Kp&& __key, _Args&&... __args) { return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...); } template requires is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, const key_type& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + try_emplace(const_iterator __hint, const key_type& __key, _Args&&... __args) { return __try_emplace_hint(__hint, __key, std::forward<_Args>(__args)...).first; } template requires is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, key_type&& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + try_emplace(const_iterator __hint, key_type&& __key, _Args&&... __args) { return __try_emplace_hint(__hint, std::move(__key), std::forward<_Args>(__args)...).first; } template requires __is_compare_transparent && is_constructible_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, _Kp&& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + try_emplace(const_iterator __hint, _Kp&& __key, _Args&&... __args) { return __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Args>(__args)...).first; } template requires is_assignable_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(const key_type& __key, _Mapped&& __obj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + insert_or_assign(const key_type& __key, _Mapped&& __obj) { return __insert_or_assign(__key, std::forward<_Mapped>(__obj)); } template requires is_assignable_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(key_type&& __key, _Mapped&& __obj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + insert_or_assign(key_type&& __key, _Mapped&& __obj) { return __insert_or_assign(std::move(__key), std::forward<_Mapped>(__obj)); } template requires __is_compare_transparent && is_constructible_v && is_assignable_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(_Kp&& __key, _Mapped&& __obj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + insert_or_assign(_Kp&& __key, _Mapped&& __obj) { return __insert_or_assign(std::forward<_Kp>(__key), std::forward<_Mapped>(__obj)); } template requires is_assignable_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, const key_type& __key, _Mapped&& __obj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + insert_or_assign(const_iterator __hint, const key_type& __key, _Mapped&& __obj) { return __insert_or_assign(__hint, __key, std::forward<_Mapped>(__obj)); } template requires is_assignable_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, key_type&& __key, _Mapped&& __obj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + insert_or_assign(const_iterator __hint, key_type&& __key, _Mapped&& __obj) { return __insert_or_assign(__hint, std::move(__key), std::forward<_Mapped>(__obj)); } template requires __is_compare_transparent && is_constructible_v && is_assignable_v && is_constructible_v - _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __obj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __obj) { return __insert_or_assign(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__obj)); } - _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __position) { return __erase(__position.__key_iter_, __position.__mapped_iter_); } - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __position) { return __erase(__position.__key_iter_, __position.__mapped_iter_); } - _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __x) { auto __iter = find(__x); if (__iter != end()) { erase(__iter); @@ -680,14 +723,14 @@ public: template requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> && !is_convertible_v<_Kp &&, const_iterator>) - _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(_Kp&& __x) { auto [__first, __last] = equal_range(__x); auto __res = __last - __first; erase(__first, __last); return __res; } - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __first, const_iterator __last) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); auto __key_it = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_); auto __mapped_it = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_); @@ -695,7 +738,7 @@ public: return iterator(std::move(__key_it), std::move(__mapped_it)); } - _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __y) noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_map& __y) noexcept { // warning: The spec has unconditional noexcept, which means that // if any of the following functions throw an exception, // std::terminate will be called. @@ -705,133 +748,156 @@ public: ranges::swap(__containers_.values, __y.__containers_.values); } - _LIBCPP_HIDE_FROM_ABI void clear() noexcept { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __containers_.keys.clear(); __containers_.values.clear(); } // observers - _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } - _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { + return value_compare(__compare_); + } - _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; } - _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const key_container_type& keys() const noexcept { + return __containers_.keys; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_container_type& values() const noexcept { + return __containers_.values; + } // map operations - _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { + return __find_impl(*this, __x); + } - _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } - - template - requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { return __find_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; } - template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; } - _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } - template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { + return contains(__x) ? 1 : 0; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { return find(__x) != end(); } - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound(*this, __x); } + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { + return find(__x) != end(); + } - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { + return __lower_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const key_type& __x) const { return __lower_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { return __lower_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { return __lower_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound(*this, __x); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { + return __upper_bound(*this, __x); + } - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const key_type& __x) const { return __upper_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { return __upper_bound(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { return __upper_bound(*this, __x); } - _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __x) { return __equal_range_impl(*this, __x); } - _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) const { return __equal_range_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _Kp& __x) { return __equal_range_impl(*this, __x); } template requires __is_compare_transparent - _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) const { return __equal_range_impl(*this, __x); } - friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_map& __x, const flat_map& __y) { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator==(const flat_map& __x, const flat_map& __y) { return ranges::equal(__x, __y); } - friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_map& __x, const flat_map& __y) { + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 auto + operator<=>(const flat_map& __x, const flat_map& __y) { return std::lexicographical_compare_three_way( __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } - friend _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __x, flat_map& __y) noexcept { __x.swap(__y); } + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_map& __x, flat_map& __y) noexcept { + __x.swap(__y); + } private: struct __ctor_uses_allocator_tag { - explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default; + explicit _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __ctor_uses_allocator_tag() = default; }; struct __ctor_uses_allocator_empty_tag { - explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default; + explicit _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __ctor_uses_allocator_empty_tag() = default; }; template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI - flat_map(__ctor_uses_allocator_tag, - const _Allocator& __alloc, - _KeyCont&& __key_cont, - _MappedCont&& __mapped_cont, - _CompArg&&... __comp) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_map( + __ctor_uses_allocator_tag, + const _Allocator& __alloc, + _KeyCont&& __key_cont, + _MappedCont&& __mapped_cont, + _CompArg&&... __comp) : __containers_{.keys = std::make_obj_using_allocator( __alloc, std::forward<_KeyCont>(__key_cont)), .values = std::make_obj_using_allocator( @@ -840,12 +906,13 @@ private: template requires __allocator_ctor_constraint<_Allocator> - _LIBCPP_HIDE_FROM_ABI flat_map(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_map(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) : __containers_{.keys = std::make_obj_using_allocator(__alloc), .values = std::make_obj_using_allocator(__alloc)}, __compare_(std::forward<_CompArg>(__comp)...) {} - _LIBCPP_HIDE_FROM_ABI bool __is_sorted_and_unique(auto&& __key_container) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted_and_unique(auto&& __key_container) const { auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); }; return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container); } @@ -853,7 +920,7 @@ private: // This function is only used in constructors. So there is not exception handling in this function. // If the function exits via an exception, there will be no flat_map object constructed, thus, there // is no invariant state to preserve - _LIBCPP_HIDE_FROM_ABI void __sort_and_unique() { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __sort_and_unique() { auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); ranges::sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); }); auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin(); @@ -862,8 +929,17 @@ private: __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end()); } + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto + __corresponding_mapped_it(_Self&& __self, _KeyIter&& __key_iter) { + return __self.__containers_.values.begin() + + static_cast>( + ranges::distance(__self.__containers_.keys.begin(), __key_iter)); + } + template - _LIBCPP_HIDE_FROM_ABI void __append_sort_merge_unique(_InputIterator __first, _Sentinel __last) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + __append_sort_merge_unique(_InputIterator __first, _Sentinel __last) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); size_t __num_of_appended = __flat_map_utils::__append(*this, std::move(__first), std::move(__last)); if (__num_of_appended != 0) { @@ -891,7 +967,7 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __find_impl(_Self&& __self, const _Kp& __key) { auto __it = __self.lower_bound(__key); auto __last = __self.end(); if (__it == __last || __self.__compare_(__key, __it->first)) { @@ -901,8 +977,9 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __key_equal_range(_Self&& __self, const _Kp& __key) { - auto __it = ranges::lower_bound(__self.__containers_.keys, __key, __self.__compare_); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __key_equal_range(_Self&& __self, const _Kp& __key) { + auto __it = + std::lower_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __key, __self.__compare_); auto __last = __self.__containers_.keys.end(); if (__it == __last || __self.__compare_(__key, *__it)) { return std::make_pair(__it, __it); @@ -911,44 +988,33 @@ private: } template - _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { auto [__key_first, __key_last] = __key_equal_range(__self, __key); - - const auto __make_mapped_iter = [&](const auto& __key_iter) { - return __self.__containers_.values.begin() + - static_cast>( - ranges::distance(__self.__containers_.keys.begin(), __key_iter)); - }; - - using __iterator_type = ranges::iterator_t; - return std::make_pair(__iterator_type(__key_first, __make_mapped_iter(__key_first)), - __iterator_type(__key_last, __make_mapped_iter(__key_last))); + using __iterator_type = ranges::iterator_t; + return std::make_pair(__iterator_type(__key_first, __corresponding_mapped_it(__self, __key_first)), + __iterator_type(__key_last, __corresponding_mapped_it(__self, __key_last))); } template - _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) { - return __binary_search<_Res>(__self, ranges::lower_bound, __x); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static _Res __lower_bound(_Self&& __self, _Kp& __x) { + auto __key_iter = + std::lower_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_); + auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); + return _Res(std::move(__key_iter), std::move(__mapped_iter)); } template - _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) { - return __binary_search<_Res>(__self, ranges::upper_bound, __x); - } - - template - _LIBCPP_HIDE_FROM_ABI static _Res __binary_search(_Self&& __self, _Fn __search_fn, _Kp& __x) { - auto __key_iter = __search_fn(__self.__containers_.keys, __x, __self.__compare_); - auto __mapped_iter = - __self.__containers_.values.begin() + - static_cast>( - ranges::distance(__self.__containers_.keys.begin(), __key_iter)); - + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static _Res __upper_bound(_Self&& __self, _Kp& __x) { + auto __key_iter = + std::upper_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_); + auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); return _Res(std::move(__key_iter), std::move(__mapped_iter)); } template - _LIBCPP_HIDE_FROM_ABI pair __try_emplace(_KeyArg&& __key, _MArgs&&... __mapped_args) { - auto __key_it = ranges::lower_bound(__containers_.keys, __key, __compare_); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + __try_emplace(_KeyArg&& __key, _MArgs&&... __mapped_args) { + auto __key_it = std::lower_bound(__containers_.keys.begin(), __containers_.keys.end(), __key, __compare_); auto __mapped_it = __containers_.values.begin() + ranges::distance(__containers_.keys.begin(), __key_it); if (__key_it == __containers_.keys.end() || __compare_(__key, *__key_it)) { @@ -966,7 +1032,7 @@ private: } template - _LIBCPP_HIDE_FROM_ABI bool __is_hint_correct(const_iterator __hint, _Kp&& __key) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_hint_correct(const_iterator __hint, _Kp&& __key) { if (__hint != cbegin() && !__compare_((__hint - 1)->first, __key)) { return false; } @@ -977,7 +1043,8 @@ private: } template - _LIBCPP_HIDE_FROM_ABI pair __try_emplace_hint(const_iterator __hint, _Kp&& __key, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + __try_emplace_hint(const_iterator __hint, _Kp&& __key, _Args&&... __args) { if (__is_hint_correct(__hint, __key)) { if (__hint == cend() || __compare_(__key, __hint->first)) { return {__flat_map_utils::__emplace_exact_pos( @@ -998,7 +1065,8 @@ private: } template - _LIBCPP_HIDE_FROM_ABI pair __insert_or_assign(_Kp&& __key, _Mapped&& __mapped) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + __insert_or_assign(_Kp&& __key, _Mapped&& __mapped) { auto __r = try_emplace(std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped)); if (!__r.second) { __r.first->second = std::forward<_Mapped>(__mapped); @@ -1007,7 +1075,8 @@ private: } template - _LIBCPP_HIDE_FROM_ABI iterator __insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __mapped) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + __insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __mapped) { auto __r = __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped)); if (!__r.second) { __r.first->second = std::forward<_Mapped>(__mapped); @@ -1015,18 +1084,19 @@ private: return __r.first; } - _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { - if constexpr (requires { __containers_.keys.reserve(__size); }) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __reserve(size_t __size) { + if constexpr (__container_traits<_KeyContainer>::__reservable) { __containers_.keys.reserve(__size); } - if constexpr (requires { __containers_.values.reserve(__size); }) { + if constexpr (__container_traits<_MappedContainer>::__reservable) { __containers_.values.reserve(__size); } } template - _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator + __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); auto __key_iter = __containers_.keys.erase(__key_iter_to_remove); auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove); @@ -1036,7 +1106,8 @@ private: template friend typename flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type - erase_if(flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); + _LIBCPP_CONSTEXPR_SINCE_CXX26 + erase_if(flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); friend __flat_map_utils; @@ -1044,8 +1115,9 @@ private: _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; struct __key_equiv { - _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} - _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const_reference __x, const_reference __y) const { return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); } key_compare __comp_; @@ -1168,8 +1240,9 @@ struct uses_allocator && uses_allocator_v<_MappedContainer, _Allocator>> {}; template -_LIBCPP_HIDE_FROM_ABI typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type -erase_if(flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_map, _Predicate __pred) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type + erase_if(flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_map, _Predicate __pred) { auto __zv = ranges::views::zip(__flat_map.__containers_.keys, __flat_map.__containers_.values); auto __first = __zv.begin(); auto __last = __zv.end(); diff --git a/lib/libcxx/include/__flat_map/flat_multimap.h b/lib/libcxx/include/__flat_map/flat_multimap.h index 15fcd7995a..0af6aac00c 100644 --- a/lib/libcxx/include/__flat_map/flat_multimap.h +++ b/lib/libcxx/include/__flat_map/flat_multimap.h @@ -10,18 +10,16 @@ #ifndef _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H #define _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H +#include <__algorithm/equal_range.h> #include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/lower_bound.h> #include <__algorithm/min.h> #include <__algorithm/ranges_equal.h> -#include <__algorithm/ranges_equal_range.h> #include <__algorithm/ranges_inplace_merge.h> #include <__algorithm/ranges_is_sorted.h> -#include <__algorithm/ranges_lower_bound.h> -#include <__algorithm/ranges_partition_point.h> #include <__algorithm/ranges_sort.h> -#include <__algorithm/ranges_unique.h> -#include <__algorithm/ranges_upper_bound.h> #include <__algorithm/remove_if.h> +#include <__algorithm/upper_bound.h> #include <__assert> #include <__compare/synth_three_way.h> #include <__concepts/convertible_to.h> @@ -443,7 +441,7 @@ public: is_move_constructible_v _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) { std::pair __pair(std::forward<_Args>(__args)...); - auto __key_it = ranges::upper_bound(__containers_.keys, __pair.first, __compare_); + auto __key_it = std::upper_bound(__containers_.keys.begin(), __containers_.keys.end(), __pair.first, __compare_); auto __mapped_it = __corresponding_mapped_it(*this, __key_it); return __flat_map_utils::__emplace_exact_pos( @@ -473,7 +471,7 @@ public: // | // hint // We want to insert "2" after the last existing "2" - __key_iter = ranges::upper_bound(__containers_.keys.begin(), __key_iter, __pair.first, __compare_); + __key_iter = std::upper_bound(__containers_.keys.begin(), __key_iter, __pair.first, __compare_); __mapped_iter = __corresponding_mapped_it(*this, __key_iter); } else { _LIBCPP_ASSERT_INTERNAL(!__prev_larger && __next_smaller, "this means that the multimap is not sorted"); @@ -485,7 +483,7 @@ public: // | // hint // We want to insert "2" before the first existing "2" - __key_iter = ranges::lower_bound(__key_iter, __containers_.keys.end(), __pair.first, __compare_); + __key_iter = std::lower_bound(__key_iter, __containers_.keys.end(), __pair.first, __compare_); __mapped_iter = __corresponding_mapped_it(*this, __key_iter); } return __flat_map_utils::__emplace_exact_pos( @@ -804,7 +802,8 @@ private: template _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { - auto [__key_first, __key_last] = ranges::equal_range(__self.__containers_.keys, __key, __self.__compare_); + auto [__key_first, __key_last] = + std::equal_range(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __key, __self.__compare_); using __iterator_type = ranges::iterator_t; return std::make_pair(__iterator_type(__key_first, __corresponding_mapped_it(__self, __key_first)), @@ -813,24 +812,26 @@ private: template _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) { - auto __key_iter = ranges::lower_bound(__self.__containers_.keys, __x, __self.__compare_); + auto __key_iter = + std::lower_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_); auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); return _Res(std::move(__key_iter), std::move(__mapped_iter)); } template _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) { - auto __key_iter = ranges::upper_bound(__self.__containers_.keys, __x, __self.__compare_); + auto __key_iter = + std::upper_bound(__self.__containers_.keys.begin(), __self.__containers_.keys.end(), __x, __self.__compare_); auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); return _Res(std::move(__key_iter), std::move(__mapped_iter)); } _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { - if constexpr (requires { __containers_.keys.reserve(__size); }) { + if constexpr (__container_traits<_KeyContainer>::__reservable) { __containers_.keys.reserve(__size); } - if constexpr (requires { __containers_.values.reserve(__size); }) { + if constexpr (__container_traits<_MappedContainer>::__reservable) { __containers_.values.reserve(__size); } } diff --git a/lib/libcxx/include/__flat_map/key_value_iterator.h b/lib/libcxx/include/__flat_map/key_value_iterator.h index 3ebb653deb..d04a23d1f8 100644 --- a/lib/libcxx/include/__flat_map/key_value_iterator.h +++ b/lib/libcxx/include/__flat_map/key_value_iterator.h @@ -13,9 +13,12 @@ #include <__compare/three_way_comparable.h> #include <__concepts/convertible_to.h> #include <__config> +#include <__cstddef/size_t.h> #include <__iterator/iterator_traits.h> +#include <__iterator/product_iterator.h> #include <__memory/addressof.h> #include <__type_traits/conditional.h> +#include <__utility/forward.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -46,7 +49,7 @@ private: struct __arrow_proxy { __reference __ref_; - _LIBCPP_HIDE_FROM_ABI __reference* operator->() { return std::addressof(__ref_); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __reference* operator->() { return std::addressof(__ref_); } }; __key_iterator __key_iter_; @@ -57,6 +60,8 @@ private: template friend struct __key_value_iterator; + friend struct __product_iterator_traits<__key_value_iterator>; + public: using iterator_concept = random_access_iterator_tag; // `__key_value_iterator` only satisfy "Cpp17InputIterator" named requirements, because @@ -69,104 +74,141 @@ public: _LIBCPP_HIDE_FROM_ABI __key_value_iterator() = default; - _LIBCPP_HIDE_FROM_ABI __key_value_iterator(__key_value_iterator<_Owner, _KeyContainer, _MappedContainer, !_Const> __i) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + __key_value_iterator(__key_value_iterator<_Owner, _KeyContainer, _MappedContainer, !_Const> __i) requires _Const && convertible_to && convertible_to : __key_iter_(std::move(__i.__key_iter_)), __mapped_iter_(std::move(__i.__mapped_iter_)) {} - _LIBCPP_HIDE_FROM_ABI __key_value_iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + __key_value_iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter) : __key_iter_(std::move(__key_iter)), __mapped_iter_(std::move(__mapped_iter)) {} - _LIBCPP_HIDE_FROM_ABI __reference operator*() const { return __reference(*__key_iter_, *__mapped_iter_); } - _LIBCPP_HIDE_FROM_ABI __arrow_proxy operator->() const { return __arrow_proxy{**this}; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __reference operator*() const { + return __reference(*__key_iter_, *__mapped_iter_); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __arrow_proxy operator->() const { return __arrow_proxy{**this}; } - _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator++() { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_value_iterator& operator++() { ++__key_iter_; ++__mapped_iter_; return *this; } - _LIBCPP_HIDE_FROM_ABI __key_value_iterator operator++(int) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_value_iterator operator++(int) { __key_value_iterator __tmp(*this); ++*this; return __tmp; } - _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator--() { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_value_iterator& operator--() { --__key_iter_; --__mapped_iter_; return *this; } - _LIBCPP_HIDE_FROM_ABI __key_value_iterator operator--(int) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_value_iterator operator--(int) { __key_value_iterator __tmp(*this); --*this; return __tmp; } - _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator+=(difference_type __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_value_iterator& operator+=(difference_type __x) { __key_iter_ += __x; __mapped_iter_ += __x; return *this; } - _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator-=(difference_type __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_value_iterator& operator-=(difference_type __x) { __key_iter_ -= __x; __mapped_iter_ -= __x; return *this; } - _LIBCPP_HIDE_FROM_ABI __reference operator[](difference_type __n) const { return *(*this + __n); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __reference operator[](difference_type __n) const { + return *(*this + __n); + } - _LIBCPP_HIDE_FROM_ABI friend constexpr bool + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend bool operator==(const __key_value_iterator& __x, const __key_value_iterator& __y) { return __x.__key_iter_ == __y.__key_iter_; } - _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __key_value_iterator& __x, const __key_value_iterator& __y) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend bool + operator<(const __key_value_iterator& __x, const __key_value_iterator& __y) { return __x.__key_iter_ < __y.__key_iter_; } - _LIBCPP_HIDE_FROM_ABI friend bool operator>(const __key_value_iterator& __x, const __key_value_iterator& __y) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend bool + operator>(const __key_value_iterator& __x, const __key_value_iterator& __y) { return __y < __x; } - _LIBCPP_HIDE_FROM_ABI friend bool operator<=(const __key_value_iterator& __x, const __key_value_iterator& __y) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend bool + operator<=(const __key_value_iterator& __x, const __key_value_iterator& __y) { return !(__y < __x); } - _LIBCPP_HIDE_FROM_ABI friend bool operator>=(const __key_value_iterator& __x, const __key_value_iterator& __y) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend bool + operator>=(const __key_value_iterator& __x, const __key_value_iterator& __y) { return !(__x < __y); } - _LIBCPP_HIDE_FROM_ABI friend auto operator<=>(const __key_value_iterator& __x, const __key_value_iterator& __y) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend auto + operator<=>(const __key_value_iterator& __x, const __key_value_iterator& __y) requires three_way_comparable<__key_iterator> { return __x.__key_iter_ <=> __y.__key_iter_; } - _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator+(const __key_value_iterator& __i, difference_type __n) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend __key_value_iterator + operator+(const __key_value_iterator& __i, difference_type __n) { auto __tmp = __i; __tmp += __n; return __tmp; } - _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator+(difference_type __n, const __key_value_iterator& __i) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend __key_value_iterator + operator+(difference_type __n, const __key_value_iterator& __i) { return __i + __n; } - _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator-(const __key_value_iterator& __i, difference_type __n) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend __key_value_iterator + operator-(const __key_value_iterator& __i, difference_type __n) { auto __tmp = __i; __tmp -= __n; return __tmp; } - _LIBCPP_HIDE_FROM_ABI friend difference_type + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 friend difference_type operator-(const __key_value_iterator& __x, const __key_value_iterator& __y) { return difference_type(__x.__key_iter_ - __y.__key_iter_); } }; +template +struct __product_iterator_traits<__key_value_iterator<_Owner, _KeyContainer, _MappedContainer, _Const>> { + static constexpr size_t __size = 2; + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static decltype(auto) __get_iterator_element(_Iter&& __it) + requires(_Nth <= 1) + { + if constexpr (_Nth == 0) { + return std::forward<_Iter>(__it).__key_iter_; + } else { + return std::forward<_Iter>(__it).__mapped_iter_; + } + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto + __make_product_iterator(_KeyIter&& __key_iter, _MappedIter&& __mapped_iter) { + return __key_value_iterator<_Owner, _KeyContainer, _MappedContainer, _Const>( + std::forward<_KeyIter>(__key_iter), std::forward<_MappedIter>(__mapped_iter)); + } +}; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__flat_map/utils.h b/lib/libcxx/include/__flat_map/utils.h index acb7dca7ff..3a05c71566 100644 --- a/lib/libcxx/include/__flat_map/utils.h +++ b/lib/libcxx/include/__flat_map/utils.h @@ -11,6 +11,7 @@ #define _LIBCPP___FLAT_MAP_UTILS_H #include <__config> +#include <__iterator/product_iterator.h> #include <__type_traits/container_traits.h> #include <__utility/exception_guard.h> #include <__utility/forward.h> @@ -35,7 +36,7 @@ struct __flat_map_utils { // roll back the changes it made to the map. If it cannot roll back the changes, it will // clear the map. template - _LIBCPP_HIDE_FROM_ABI static typename _Map::iterator __emplace_exact_pos( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static typename _Map::iterator __emplace_exact_pos( _Map& __map, _IterK&& __it_key, _IterM&& __it_mapped, _KeyArg&& __key, _MArgs&&... __mapped_args) { auto __on_key_failed = std::__make_exception_guard([&]() noexcept { using _KeyContainer = typename _Map::key_container_type; @@ -79,10 +80,8 @@ struct __flat_map_utils { return typename _Map::iterator(std::move(__key_it), std::move(__mapped_it)); } - // TODO: We could optimize this, see - // https://github.com/llvm/llvm-project/issues/108624 template - _LIBCPP_HIDE_FROM_ABI static typename _Map::size_type + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static typename _Map::size_type __append(_Map& __map, _InputIterator __first, _Sentinel __last) { typename _Map::size_type __num_appended = 0; for (; __first != __last; ++__first) { @@ -93,6 +92,25 @@ struct __flat_map_utils { } return __num_appended; } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static typename _Map::size_type + __append(_Map& __map, _InputIterator __first, _InputIterator __last) + requires __is_product_iterator_of_size<_InputIterator, 2>::value + { + auto __s1 = __map.__containers_.keys.size(); + __map.__containers_.keys.insert( + __map.__containers_.keys.end(), + __product_iterator_traits<_InputIterator>::template __get_iterator_element<0>(__first), + __product_iterator_traits<_InputIterator>::template __get_iterator_element<0>(__last)); + + __map.__containers_.values.insert( + __map.__containers_.values.end(), + __product_iterator_traits<_InputIterator>::template __get_iterator_element<1>(__first), + __product_iterator_traits<_InputIterator>::template __get_iterator_element<1>(__last)); + + return __map.__containers_.keys.size() - __s1; + } }; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__flat_set/flat_multiset.h b/lib/libcxx/include/__flat_set/flat_multiset.h new file mode 100644 index 0000000000..44d8af05a5 --- /dev/null +++ b/lib/libcxx/include/__flat_set/flat_multiset.h @@ -0,0 +1,792 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_MAP_FLAT_MULTISET_H +#define _LIBCPP___FLAT_MAP_FLAT_MULTISET_H + +#include <__algorithm/equal_range.h> +#include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/lower_bound.h> +#include <__algorithm/min.h> +#include <__algorithm/ranges_equal.h> +#include <__algorithm/ranges_inplace_merge.h> +#include <__algorithm/ranges_is_sorted.h> +#include <__algorithm/ranges_sort.h> +#include <__algorithm/ranges_unique.h> +#include <__algorithm/remove_if.h> +#include <__algorithm/upper_bound.h> +#include <__assert> +#include <__compare/synth_three_way.h> +#include <__concepts/convertible_to.h> +#include <__concepts/swappable.h> +#include <__config> +#include <__cstddef/byte.h> +#include <__cstddef/ptrdiff_t.h> +#include <__flat_map/key_value_iterator.h> +#include <__flat_map/sorted_equivalent.h> +#include <__flat_set/ra_iterator.h> +#include <__flat_set/utils.h> +#include <__functional/invoke.h> +#include <__functional/is_transparent.h> +#include <__functional/operations.h> +#include <__fwd/vector.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/prev.h> +#include <__iterator/ranges_iterator_traits.h> +#include <__iterator/reverse_iterator.h> +#include <__memory/allocator_traits.h> +#include <__memory/uses_allocator.h> +#include <__memory/uses_allocator_construction.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/container_compatible_range.h> +#include <__ranges/drop_view.h> +#include <__ranges/from_range.h> +#include <__ranges/ref_view.h> +#include <__ranges/size.h> +#include <__ranges/subrange.h> +#include <__ranges/zip_view.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/container_traits.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_allocator.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/maybe_const.h> +#include <__utility/as_const.h> +#include <__utility/exception_guard.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include <__utility/scope_guard.h> +#include <__vector/vector.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , class _KeyContainer = vector<_Key>> +class flat_multiset { + template + friend class flat_multiset; + + friend __flat_set_utils; + + static_assert(is_same_v<_Key, typename _KeyContainer::value_type>); + static_assert(!is_same_v<_KeyContainer, std::vector>, "vector is not a sequence container"); + +public: + // types + using key_type = _Key; + using value_type = _Key; + using key_compare = __type_identity_t<_Compare>; + using value_compare = _Compare; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = typename _KeyContainer::size_type; + using difference_type = typename _KeyContainer::difference_type; + using iterator = __ra_iterator; + using const_iterator = iterator; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using container_type = _KeyContainer; + +public: + // [flat.multiset.cons], constructors + _LIBCPP_HIDE_FROM_ABI flat_multiset() noexcept(is_nothrow_default_constructible_v<_KeyContainer> && + is_nothrow_default_constructible_v<_Compare>) + : __keys_(), __compare_() {} + + _LIBCPP_HIDE_FROM_ABI flat_multiset(const flat_multiset&) = default; + + // The copy/move constructors are not specified in the spec, which means they should be defaulted. + // However, the move constructor can potentially leave a moved-from object in an inconsistent + // state if an exception is thrown. + _LIBCPP_HIDE_FROM_ABI flat_multiset(flat_multiset&& __other) noexcept( + is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_Compare>) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __keys_(std::move(__other.__keys_)), __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + // gcc does not like the `throw` keyword in a conditionally noexcept function + if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_Compare>)) { + throw; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(const key_compare& __comp) : __keys_(), __compare_(__comp) {} + + _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(container_type __keys, const key_compare& __comp = key_compare()) + : __keys_(std::move(__keys)), __compare_(__comp) { + ranges::sort(__keys_, __compare_); + } + + _LIBCPP_HIDE_FROM_ABI + flat_multiset(sorted_equivalent_t, container_type __keys, const key_compare& __comp = key_compare()) + : __keys_(std::move(__keys)), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI + flat_multiset(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __keys_(), __compare_(__comp) { + insert(__first, __last); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI flat_multiset( + sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __keys_(__first, __last), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t __fr, _Range&& __rg) + : flat_multiset(__fr, std::forward<_Range>(__rg), key_compare()) {} + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multiset(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + _LIBCPP_HIDE_FROM_ABI flat_multiset(initializer_list __il, const key_compare& __comp = key_compare()) + : flat_multiset(__il.begin(), __il.end(), __comp) {} + + _LIBCPP_HIDE_FROM_ABI + flat_multiset(sorted_equivalent_t, initializer_list __il, const key_compare& __comp = key_compare()) + : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __comp) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI explicit flat_multiset(const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(const container_type& __keys, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_() { + ranges::sort(__keys_, __compare_); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI + flat_multiset(const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_(__comp) { + ranges::sort(__keys_, __compare_); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(sorted_equivalent_t, const container_type& __keys, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_() { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI + flat_multiset(sorted_equivalent_t, const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(const flat_multiset& __other, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __other.__keys_)), + __compare_(__other.__compare_) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(flat_multiset&& __other, const _Allocator& __alloc) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __keys_(std::make_obj_using_allocator(__alloc, std::move(__other.__keys_))), + __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + throw; +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI flat_multiset(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI + flat_multiset(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI + flat_multiset(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __first, __last)), __compare_() { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI + flat_multiset(sorted_equivalent_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __first, __last)), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys_, __compare_), "Key container is not sorted"); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(initializer_list __il, const _Allocator& __alloc) + : flat_multiset(__il.begin(), __il.end(), __alloc) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI + flat_multiset(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_multiset(__il.begin(), __il.end(), __comp, __alloc) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset(sorted_equivalent_t, initializer_list __il, const _Allocator& __alloc) + : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __alloc) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI flat_multiset( + sorted_equivalent_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_multiset(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {} + + _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(initializer_list __il) { + clear(); + insert(__il); + return *this; + } + + // copy/move assignment are not specified in the spec (defaulted) + // but move assignment can potentially leave moved from object in an inconsistent + // state if an exception is thrown + _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(const flat_multiset&) = default; + + _LIBCPP_HIDE_FROM_ABI flat_multiset& operator=(flat_multiset&& __other) noexcept( + is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_Compare>) { + auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); + auto __clear_self_guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __keys_ = std::move(__other.__keys_); + __compare_ = std::move(__other.__compare_); + __clear_self_guard.__complete(); + return *this; + } + + // iterators + _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { return iterator(std::as_const(__keys_).begin()); } + _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { return const_iterator(__keys_.begin()); } + _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { return iterator(std::as_const(__keys_).end()); } + _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { return const_iterator(__keys_.end()); } + + _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + + _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } + _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + + // capacity + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __keys_.empty(); } + _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __keys_.size(); } + _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { return __keys_.max_size(); } + + // [flat.multiset.modifiers], modifiers + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) { + if constexpr (sizeof...(__args) == 1 && (is_same_v, _Key> && ...)) { + return __emplace(std::forward<_Args>(__args)...); + } else { + return __emplace(_Key(std::forward<_Args>(__args)...)); + } + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + if constexpr (sizeof...(__args) == 1 && (is_same_v, _Key> && ...)) { + return __emplace_hint(std::move(__hint), std::forward<_Args>(__args)...); + } else { + return __emplace_hint(std::move(__hint), _Key(std::forward<_Args>(__args)...)); + } + } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); } + + _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + return emplace_hint(__hint, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + return emplace_hint(__hint, std::move(__x)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + __append_sort_merge(std::move(__first), std::move(__last)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + + __append_sort_merge(std::move(__first), std::move(__last)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge(std::forward<_Range>(__range)); + } + + _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + + _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list __il) { + insert(sorted_equivalent, __il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI container_type extract() && { + auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); + auto __ret = std::move(__keys_); + return __ret; + } + + _LIBCPP_HIDE_FROM_ABI void replace(container_type&& __keys) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(ranges::is_sorted(__keys, __compare_), "Key container is not sorted"); + auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __keys_ = std::move(__keys); + __guard.__complete(); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_iter = __keys_.erase(__position.__base()); + __on_failure.__complete(); + return iterator(__key_iter); + } + + // The following overload is the same as the iterator overload + // iterator erase(const_iterator __position); + + _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + template + requires(__is_transparent_v<_Compare> && !is_convertible_v<_Kp &&, iterator> && + !is_convertible_v<_Kp &&, const_iterator>) + _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_it = __keys_.erase(__first.__base(), __last.__base()); + __on_failure.__complete(); + return iterator(std::move(__key_it)); + } + + _LIBCPP_HIDE_FROM_ABI void swap(flat_multiset& __y) noexcept { + // warning: The spec has unconditional noexcept, which means that + // if any of the following functions throw an exception, + // std::terminate will be called + // This is discussed in P3567, which hasn't been voted on yet. + ranges::swap(__compare_, __y.__compare_); + ranges::swap(__keys_, __y.__keys_); + } + + _LIBCPP_HIDE_FROM_ABI void clear() noexcept { __keys_.clear(); } + + // observers + _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } + _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return __compare_; } + + // map operations + _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + return __find_impl(*this, __x); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { + auto [__first, __last] = equal_range(__x); + return __last - __first; + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + auto [__first, __last] = equal_range(__x); + return __last - __first; + } + + _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + return find(__x) != end(); + } + + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { + const auto& __keys = __keys_; + return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + const auto& __keys = __keys_; + return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { + const auto& __keys = __keys_; + return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + const auto& __keys = __keys_; + return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + return __equal_range_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + return __equal_range_impl(*this, __x); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + return __equal_range_impl(*this, __x); + } + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + return __equal_range_impl(*this, __x); + } + + friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multiset& __x, const flat_multiset& __y) { + return ranges::equal(__x, __y); + } + + friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multiset& __x, const flat_multiset& __y) { + return std::lexicographical_compare_three_way( + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + } + + friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multiset& __x, flat_multiset& __y) noexcept { __x.swap(__y); } + +private: + template + _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_Args&&... __args) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + size_type __old_size = size(); + __flat_set_utils::__append(*this, std::forward<_Args>(__args)...); + if constexpr (!_WasSorted) { + ranges::sort(__keys_.begin() + __old_size, __keys_.end(), __compare_); + } else { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + ranges::is_sorted(__keys_ | ranges::views::drop(__old_size)), "Key container is not sorted"); + } + ranges::inplace_merge(__keys_.begin(), __keys_.begin() + __old_size, __keys_.end(), __compare_); + __on_failure.__complete(); + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __emplace(_Kp&& __key) { + auto __it = upper_bound(__key); + return __flat_set_utils::__emplace_exact_pos(*this, __it, std::forward<_Kp>(__key)); + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint(const_iterator __hint, _Kp&& __key) { + auto __prev_larger = __hint != cbegin() && __compare_(__key, *std::prev(__hint)); + auto __next_smaller = __hint != cend() && __compare_(*__hint, __key); + + if (!__prev_larger && !__next_smaller) [[likely]] { + // hint correct, just use exact hint iterator + } else if (__prev_larger && !__next_smaller) { + // the hint position is more to the right than the key should have been. + // we want to emplace the element to a position as right as possible + // e.g. Insert new element "2" in the following range + // 1, 1, 2, 2, 2, 3, 4, 6 + // ^ + // | + // hint + // We want to insert "2" after the last existing "2" + __hint = std::upper_bound(begin(), __hint, __key, __compare_); + } else { + _LIBCPP_ASSERT_INTERNAL(!__prev_larger && __next_smaller, "this means that the multiset is not sorted"); + + // the hint position is more to the left than the key should have been. + // we want to emplace the element to a position as left as possible + // 1, 1, 2, 2, 2, 3, 4, 6 + // ^ + // | + // hint + // We want to insert "2" before the first existing "2" + __hint = std::lower_bound(__hint, end(), __key, __compare_); + } + return __flat_set_utils::__emplace_exact_pos(*this, __hint, std::forward<_Kp>(__key)); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + auto __it = __self.lower_bound(__key); + auto __last = __self.end(); + if (__it == __last || __self.__compare_(__key, *__it)) { + return __last; + } + return __it; + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + using __iter = _If>, const_iterator, iterator>; + auto [__key_first, __key_last] = + std::equal_range(__self.__keys_.begin(), __self.__keys_.end(), __key, __self.__compare_); + return std::make_pair(__iter(__key_first), __iter(__key_last)); + } + + _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { + if constexpr (__container_traits<_KeyContainer>::__reservable) { + __keys_.reserve(__size); + } + } + + template + friend typename flat_multiset<_Key2, _Compare2, _KeyContainer2>::size_type + erase_if(flat_multiset<_Key2, _Compare2, _KeyContainer2>&, _Predicate); + + _KeyContainer __keys_; + _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; + + struct __key_equiv { + _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); + } + key_compare __comp_; + }; +}; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + is_invocable_v) +flat_multiset(_KeyContainer, _Compare = _Compare()) + -> flat_multiset; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) +flat_multiset(_KeyContainer, _Allocator) + -> flat_multiset, _KeyContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + uses_allocator_v<_KeyContainer, _Allocator> && + is_invocable_v) +flat_multiset(_KeyContainer, _Compare, _Allocator) + -> flat_multiset; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + is_invocable_v) +flat_multiset(sorted_equivalent_t, _KeyContainer, _Compare = _Compare()) + -> flat_multiset; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) +flat_multiset(sorted_equivalent_t, _KeyContainer, _Allocator) + -> flat_multiset, _KeyContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + uses_allocator_v<_KeyContainer, _Allocator> && + is_invocable_v) +flat_multiset(sorted_equivalent_t, _KeyContainer, _Compare, _Allocator) + -> flat_multiset; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_multiset(_InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_multiset<__iter_value_type<_InputIterator>, _Compare>; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_multiset(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_multiset<__iter_value_type<_InputIterator>, _Compare>; + +template >, + class _Allocator = allocator>, + class = __enable_if_t::value && __is_allocator<_Allocator>::value>> +flat_multiset(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multiset< + ranges::range_value_t<_Range>, + _Compare, + vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; + +template ::value>> +flat_multiset(from_range_t, _Range&&, _Allocator) -> flat_multiset< + ranges::range_value_t<_Range>, + less>, + vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_multiset(initializer_list<_Key>, _Compare = _Compare()) -> flat_multiset<_Key, _Compare>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_multiset(sorted_equivalent_t, initializer_list<_Key>, _Compare = _Compare()) -> flat_multiset<_Key, _Compare>; + +template +struct uses_allocator, _Allocator> + : bool_constant > {}; + +template +_LIBCPP_HIDE_FROM_ABI typename flat_multiset<_Key, _Compare, _KeyContainer>::size_type +erase_if(flat_multiset<_Key, _Compare, _KeyContainer>& __flat_multiset, _Predicate __pred) { + auto __guard = std::__make_exception_guard([&] { __flat_multiset.clear(); }); + auto __it = + std::remove_if(__flat_multiset.__keys_.begin(), __flat_multiset.__keys_.end(), [&](const auto& __e) -> bool { + return static_cast(__pred(__e)); + }); + auto __res = __flat_multiset.__keys_.end() - __it; + __flat_multiset.__keys_.erase(__it, __flat_multiset.__keys_.end()); + __guard.__complete(); + return __res; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_MAP_FLAT_MULTISET_H diff --git a/lib/libcxx/include/__flat_set/flat_set.h b/lib/libcxx/include/__flat_set/flat_set.h new file mode 100644 index 0000000000..95cb998459 --- /dev/null +++ b/lib/libcxx/include/__flat_set/flat_set.h @@ -0,0 +1,874 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_SET_FLAT_SET_H +#define _LIBCPP___FLAT_SET_FLAT_SET_H + +#include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/lower_bound.h> +#include <__algorithm/min.h> +#include <__algorithm/ranges_adjacent_find.h> +#include <__algorithm/ranges_equal.h> +#include <__algorithm/ranges_inplace_merge.h> +#include <__algorithm/ranges_sort.h> +#include <__algorithm/ranges_unique.h> +#include <__algorithm/remove_if.h> +#include <__algorithm/upper_bound.h> +#include <__assert> +#include <__compare/synth_three_way.h> +#include <__concepts/swappable.h> +#include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__flat_map/sorted_unique.h> +#include <__flat_set/ra_iterator.h> +#include <__flat_set/utils.h> +#include <__functional/invoke.h> +#include <__functional/is_transparent.h> +#include <__functional/operations.h> +#include <__fwd/vector.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/prev.h> +#include <__iterator/ranges_iterator_traits.h> +#include <__iterator/reverse_iterator.h> +#include <__memory/allocator_traits.h> +#include <__memory/uses_allocator.h> +#include <__memory/uses_allocator_construction.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/container_compatible_range.h> +#include <__ranges/drop_view.h> +#include <__ranges/from_range.h> +#include <__ranges/ref_view.h> +#include <__ranges/size.h> +#include <__ranges/subrange.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/container_traits.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_allocator.h> +#include <__type_traits/is_const.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/remove_reference.h> +#include <__utility/as_const.h> +#include <__utility/exception_guard.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include <__utility/scope_guard.h> +#include <__vector/vector.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , class _KeyContainer = vector<_Key>> +class flat_set { + template + friend class flat_set; + + friend __flat_set_utils; + + static_assert(is_same_v<_Key, typename _KeyContainer::value_type>); + static_assert(!is_same_v<_KeyContainer, std::vector>, "vector is not a sequence container"); + + using __key_iterator _LIBCPP_NODEBUG = typename _KeyContainer::const_iterator; + +public: + // types + using key_type = _Key; + using value_type = _Key; + using key_compare = __type_identity_t<_Compare>; + using value_compare = _Compare; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = typename _KeyContainer::size_type; + using difference_type = typename _KeyContainer::difference_type; + using iterator = __ra_iterator; + using const_iterator = iterator; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using container_type = _KeyContainer; + +public: + // [flat.set.cons], construct/copy/destroy + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set() noexcept(is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_Compare>) + : __keys_(), __compare_() {} + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(const flat_set&) = default; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(flat_set&& __other) noexcept( + is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_Compare>) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __keys_(std::move(__other.__keys_)), __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + // gcc does not like the `throw` keyword in a conditionally noexcept function + if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_Compare>)) { + throw; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_set(const key_compare& __comp) + : __keys_(), __compare_(__comp) {} + + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_set(container_type __keys, const key_compare& __comp = key_compare()) + : __keys_(std::move(__keys)), __compare_(__comp) { + __sort_and_unique(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, container_type __keys, const key_compare& __comp = key_compare()) + : __keys_(std::move(__keys)), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys_), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __keys_(), __compare_(__comp) { + insert(__first, __last); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __keys_(__first, __last), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys_), "Either the key container is not sorted or it contains duplicates"); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(from_range_t, _Range&& __rg) + : flat_set(from_range, std::forward<_Range>(__rg), key_compare()) {} + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(from_range_t, _Range&& __rg, const key_compare& __comp) + : flat_set(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(initializer_list __il, const key_compare& __comp = key_compare()) + : flat_set(__il.begin(), __il.end(), __comp) {} + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, initializer_list __il, const key_compare& __comp = key_compare()) + : flat_set(sorted_unique, __il.begin(), __il.end(), __comp) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit flat_set(const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(const container_type& __keys, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_() { + __sort_and_unique(); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_(__comp) { + __sort_and_unique(); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, const container_type& __keys, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_() { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys_), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, const container_type& __keys, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __keys)), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys_), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(const flat_set& __other, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __other.__keys_)), + __compare_(__other.__compare_) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(flat_set&& __other, const _Allocator& __alloc) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __keys_(std::make_obj_using_allocator(__alloc, std::move(__other.__keys_))), + __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + throw; +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __first, __last)), __compare_() { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys_), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && uses_allocator::value) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set( + sorted_unique_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc, __first, __last)), __compare_(__comp) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys_), "Either the key container is not sorted or it contains duplicates"); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set(from_range_t, _Range&& __rg, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_() { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + : __keys_(std::make_obj_using_allocator(__alloc)), __compare_(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(initializer_list __il, const _Allocator& __alloc) + : flat_set(__il.begin(), __il.end(), __alloc) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_set(__il.begin(), __il.end(), __comp, __alloc) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, initializer_list __il, const _Allocator& __alloc) + : flat_set(sorted_unique, __il.begin(), __il.end(), __alloc) {} + + template + requires uses_allocator::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + flat_set(sorted_unique_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_set(sorted_unique, __il.begin(), __il.end(), __comp, __alloc) {} + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set& operator=(initializer_list __il) { + clear(); + insert(__il); + return *this; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set& operator=(const flat_set&) = default; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 flat_set& operator=(flat_set&& __other) noexcept( + is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_Compare>) { + // No matter what happens, we always want to clear the other container before returning + // since we moved from it + auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); + { + // If an exception is thrown, we have no choice but to clear *this to preserve invariants + auto __on_exception = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __keys_ = std::move(__other.__keys_); + __compare_ = std::move(__other.__compare_); + __on_exception.__complete(); + } + return *this; + } + + // iterators + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() noexcept { + return iterator(std::as_const(__keys_).begin()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const noexcept { + return const_iterator(__keys_.begin()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() noexcept { + return iterator(std::as_const(__keys_).end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const noexcept { + return const_iterator(__keys_.end()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() noexcept { + return reverse_iterator(end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() noexcept { + return reverse_iterator(begin()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const noexcept { return begin(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const noexcept { return end(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(end()); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(begin()); + } + + // [flat.set.capacity], capacity + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const noexcept { + return __keys_.empty(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const noexcept { return __keys_.size(); } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const noexcept { return __keys_.max_size(); } + + // [flat.set.modifiers], modifiers + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair emplace(_Args&&... __args) { + if constexpr (sizeof...(__args) == 1 && (is_same_v, _Key> && ...)) { + return __emplace(std::forward<_Args>(__args)...); + } else { + return __emplace(_Key(std::forward<_Args>(__args)...)); + } + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + if constexpr (sizeof...(__args) == 1 && (is_same_v, _Key> && ...)) { + return __emplace_hint(std::move(__hint), std::forward<_Args>(__args)...); + } else { + return __emplace_hint(std::move(__hint), _Key(std::forward<_Args>(__args)...)); + } + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair insert(const value_type& __x) { + return emplace(__x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair insert(value_type&& __x) { + return emplace(std::move(__x)); + } + + template + requires(__is_transparent_v<_Compare> && is_constructible_v) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair insert(_Kp&& __x) { + return __emplace(std::forward<_Kp>(__x)); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, const value_type& __x) { + return emplace_hint(__hint, __x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, value_type&& __x) { + return emplace_hint(__hint, std::move(__x)); + } + + template + requires(__is_transparent_v<_Compare> && is_constructible_v) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, _Kp&& __x) { + return __emplace_hint(__hint, std::forward<_Kp>(__x)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + __append_sort_merge_unique(std::move(__first), std::move(__last)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + insert(sorted_unique_t, _InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + + __append_sort_merge_unique(std::move(__first), std::move(__last)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge_unique(std::forward<_Range>(__range)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { + insert(__il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(sorted_unique_t, initializer_list __il) { + insert(sorted_unique, __il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 container_type extract() && { + auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); + auto __ret = std::move(__keys_); + return __ret; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void replace(container_type&& __keys) { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__keys), "Either the key container is not sorted or it contains duplicates"); + auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __keys_ = std::move(__keys); + __guard.__complete(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __position) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_iter = __keys_.erase(__position.__base()); + __on_failure.__complete(); + return iterator(__key_iter); + } + + // The following overload is the same as the iterator overload + // iterator erase(const_iterator __position); + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __x) { + auto __iter = find(__x); + if (__iter != end()) { + erase(__iter); + return 1; + } + return 0; + } + + template + requires(__is_transparent_v<_Compare> && !is_convertible_v<_Kp &&, iterator> && + !is_convertible_v<_Kp &&, const_iterator>) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(_Kp&& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __first, const_iterator __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_it = __keys_.erase(__first.__base(), __last.__base()); + __on_failure.__complete(); + return iterator(std::move(__key_it)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_set& __y) noexcept { + // warning: The spec has unconditional noexcept, which means that + // if any of the following functions throw an exception, + // std::terminate will be called. + // This is discussed in P2767, which hasn't been voted on yet. + ranges::swap(__compare_, __y.__compare_); + ranges::swap(__keys_, __y.__keys_); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() noexcept { __keys_.clear(); } + + // observers + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { return __compare_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { return __compare_; } + + // set operations + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __x) { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __x) const { + return __find_impl(*this, __x); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Kp& __x) { + return __find_impl(*this, __x); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __x) const { + return contains(__x) ? 1 : 0; + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _Kp& __x) const { + return contains(__x) ? 1 : 0; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __x) const { + return find(__x) != end(); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _Kp& __x) const { + return find(__x) != end(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __x) { + const auto& __keys = __keys_; + return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const key_type& __x) const { + return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _Kp& __x) { + const auto& __keys = __keys_; + return iterator(std::lower_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator lower_bound(const _Kp& __x) const { + return const_iterator(std::lower_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __x) { + const auto& __keys = __keys_; + return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const key_type& __x) const { + return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _Kp& __x) { + const auto& __keys = __keys_; + return iterator(std::upper_bound(__keys.begin(), __keys.end(), __x, __compare_)); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator upper_bound(const _Kp& __x) const { + return const_iterator(std::upper_bound(__keys_.begin(), __keys_.end(), __x, __compare_)); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __x) { + return __equal_range_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __x) const { + return __equal_range_impl(*this, __x); + } + + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _Kp& __x) { + return __equal_range_impl(*this, __x); + } + template + requires __is_transparent_v<_Compare> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _Kp& __x) const { + return __equal_range_impl(*this, __x); + } + + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator==(const flat_set& __x, const flat_set& __y) { + return ranges::equal(__x, __y); + } + + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 auto + operator<=>(const flat_set& __x, const flat_set& __y) { + return std::lexicographical_compare_three_way( + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + } + + friend _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(flat_set& __x, flat_set& __y) noexcept { + __x.swap(__y); + } + +private: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_sorted_and_unique(auto&& __key_container) const { + auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); }; + return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container); + } + + // This function is only used in constructors. So there is not exception handling in this function. + // If the function exits via an exception, there will be no flat_set object constructed, thus, there + // is no invariant state to preserve + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __sort_and_unique() { + ranges::sort(__keys_, __compare_); + auto __dup_start = ranges::unique(__keys_, __key_equiv(__compare_)).begin(); + __keys_.erase(__dup_start, __keys_.end()); + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __append_sort_merge_unique(_Args&&... __args) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + size_type __old_size = size(); + __flat_set_utils::__append(*this, std::forward<_Args>(__args)...); + if (size() != __old_size) { + if constexpr (!_WasSorted) { + ranges::sort(__keys_.begin() + __old_size, __keys_.end(), __compare_); + } else { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted_and_unique(__keys_ | ranges::views::drop(__old_size)), + "Either the key container is not sorted or it contains duplicates"); + } + ranges::inplace_merge(__keys_.begin(), __keys_.begin() + __old_size, __keys_.end(), __compare_); + + auto __dup_start = ranges::unique(__keys_, __key_equiv(__compare_)).begin(); + __keys_.erase(__dup_start, __keys_.end()); + } + __on_failure.__complete(); + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __find_impl(_Self&& __self, const _Kp& __key) { + auto __it = __self.lower_bound(__key); + auto __last = __self.end(); + if (__it == __last || __self.__compare_(__key, *__it)) { + return __last; + } + return __it; + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + using __iter = _If>, const_iterator, iterator>; + auto __it = std::lower_bound(__self.__keys_.begin(), __self.__keys_.end(), __key, __self.__compare_); + auto __last = __self.__keys_.end(); + if (__it == __last || __self.__compare_(__key, *__it)) { + return std::make_pair(__iter(__it), __iter(__it)); + } + return std::make_pair(__iter(__it), __iter(std::next(__it))); + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair __emplace(_Kp&& __key) { + auto __it = lower_bound(__key); + if (__it == end() || __compare_(__key, *__it)) { + return pair(__flat_set_utils::__emplace_exact_pos(*this, __it, std::forward<_Kp>(__key)), true); + } else { + return pair(std::move(__it), false); + } + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool __is_hint_correct(const_iterator __hint, _Kp&& __key) { + if (__hint != cbegin() && !__compare_(*std::prev(__hint), __key)) { + return false; + } + if (__hint != cend() && __compare_(*__hint, __key)) { + return false; + } + return true; + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator __emplace_hint(const_iterator __hint, _Kp&& __key) { + if (__is_hint_correct(__hint, __key)) { + if (__hint == cend() || __compare_(__key, *__hint)) { + return __flat_set_utils::__emplace_exact_pos(*this, __hint, std::forward<_Kp>(__key)); + } else { + // we already have an equal key + return __hint; + } + } else { + return __emplace(std::forward<_Kp>(__key)).first; + } + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __reserve(size_t __size) { + if constexpr (__container_traits<_KeyContainer>::__reservable) { + __keys_.reserve(__size); + } + } + + template + friend typename flat_set<_Key2, _Compare2, _KeyContainer2>::size_type _LIBCPP_CONSTEXPR_SINCE_CXX26 + erase_if(flat_set<_Key2, _Compare2, _KeyContainer2>&, _Predicate); + + _KeyContainer __keys_; + _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; + + struct __key_equiv { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const_reference __x, const_reference __y) const { + return !__comp_(__x, __y) && !__comp_(__y, __x); + } + key_compare __comp_; + }; +}; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + is_invocable_v) +flat_set(_KeyContainer, _Compare = _Compare()) -> flat_set; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) +flat_set(_KeyContainer, _Allocator) + -> flat_set, _KeyContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + uses_allocator_v<_KeyContainer, _Allocator> && + is_invocable_v) +flat_set(_KeyContainer, _Compare, _Allocator) -> flat_set; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + is_invocable_v) +flat_set(sorted_unique_t, _KeyContainer, _Compare = _Compare()) + -> flat_set; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && !__is_allocator<_KeyContainer>::value) +flat_set(sorted_unique_t, _KeyContainer, _Allocator) + -> flat_set, _KeyContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + uses_allocator_v<_KeyContainer, _Allocator> && + is_invocable_v) +flat_set(sorted_unique_t, _KeyContainer, _Compare, _Allocator) + -> flat_set; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_set(_InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_set<__iter_value_type<_InputIterator>, _Compare>; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_set(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_set<__iter_value_type<_InputIterator>, _Compare>; + +template >, + class _Allocator = allocator>, + class = __enable_if_t::value && __is_allocator<_Allocator>::value>> +flat_set(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_set< + ranges::range_value_t<_Range>, + _Compare, + vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; + +template ::value>> +flat_set(from_range_t, _Range&&, _Allocator) -> flat_set< + ranges::range_value_t<_Range>, + less>, + vector, __allocator_traits_rebind_t<_Allocator, ranges::range_value_t<_Range>>>>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_set(initializer_list<_Key>, _Compare = _Compare()) -> flat_set<_Key, _Compare>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_set(sorted_unique_t, initializer_list<_Key>, _Compare = _Compare()) -> flat_set<_Key, _Compare>; + +template +struct uses_allocator, _Allocator> + : bool_constant> {}; + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 typename flat_set<_Key, _Compare, _KeyContainer>::size_type +erase_if(flat_set<_Key, _Compare, _KeyContainer>& __flat_set, _Predicate __pred) { + auto __guard = std::__make_exception_guard([&] { __flat_set.clear(); }); + auto __it = std::remove_if(__flat_set.__keys_.begin(), __flat_set.__keys_.end(), [&](const auto& __e) -> bool { + return static_cast(__pred(__e)); + }); + auto __res = __flat_set.__keys_.end() - __it; + __flat_set.__keys_.erase(__it, __flat_set.__keys_.end()); + __guard.__complete(); + return __res; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_SET_FLAT_SET_H diff --git a/lib/libcxx/include/__flat_set/ra_iterator.h b/lib/libcxx/include/__flat_set/ra_iterator.h new file mode 100644 index 0000000000..6fac62895a --- /dev/null +++ b/lib/libcxx/include/__flat_set/ra_iterator.h @@ -0,0 +1,157 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_SET_RA_ITERATOR_H +#define _LIBCPP___FLAT_SET_RA_ITERATOR_H + +#include "__type_traits/is_same.h" +#include <__compare/three_way_comparable.h> +#include <__config> +#include <__iterator/incrementable_traits.h> +#include <__iterator/iterator_traits.h> +#include <__type_traits/is_constructible.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +/** + * __ra_iterator is a random access iterator that wraps an underlying iterator. + * It also stores the underlying container type in its type so that algorithms + * can optimize based on the underlying container type, and to avoid inadvertently + * mixing iterators coming from different containers.. + */ +template +struct __ra_iterator { +private: + _Iterator __iter_; + + friend _Container; + + // note: checking the concept random_access_iterator does not work for incomplete types + static_assert(_IsSame::iterator_category, random_access_iterator_tag>::value, + "Underlying iterator must be a random access iterator"); + +public: + using iterator_concept = random_access_iterator_tag; // deliberately lower contiguous_iterator + using iterator_category = random_access_iterator_tag; + using value_type = iter_value_t<_Iterator>; + using difference_type = iter_difference_t<_Iterator>; + + _LIBCPP_HIDE_FROM_ABI __ra_iterator() + requires is_default_constructible_v<_Iterator> + = default; + + _LIBCPP_HIDE_FROM_ABI explicit constexpr __ra_iterator(_Iterator __iter) : __iter_(std::move(__iter)) {} + + _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __base() const noexcept(noexcept(_Iterator(__iter_))) { return __iter_; } + + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const { return *__iter_; } + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator->() const + requires requires { __iter_.operator->(); } + { + return __iter_.operator->(); + } + + _LIBCPP_HIDE_FROM_ABI constexpr __ra_iterator& operator++() { + ++__iter_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __ra_iterator operator++(int) { + __ra_iterator __tmp(*this); + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __ra_iterator& operator--() { + --__iter_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __ra_iterator operator--(int) { + __ra_iterator __tmp(*this); + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __ra_iterator& operator+=(difference_type __x) { + __iter_ += __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr __ra_iterator& operator-=(difference_type __x) { + __iter_ -= __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const { return *(*this + __n); } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __ra_iterator& __x, const __ra_iterator& __y) { + return __x.__iter_ == __y.__iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const __ra_iterator& __x, const __ra_iterator& __y) { + return __x.__iter_ < __y.__iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const __ra_iterator& __x, const __ra_iterator& __y) { + return __y < __x; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const __ra_iterator& __x, const __ra_iterator& __y) { + return !(__y < __x); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const __ra_iterator& __x, const __ra_iterator& __y) { + return !(__x < __y); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const __ra_iterator& __x, const __ra_iterator& __y) + requires three_way_comparable<_Iterator> + { + return __x.__iter_ <=> __y.__iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __ra_iterator operator+(const __ra_iterator& __i, difference_type __n) { + auto __tmp = __i; + __tmp += __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __ra_iterator operator+(difference_type __n, const __ra_iterator& __i) { + return __i + __n; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr __ra_iterator operator-(const __ra_iterator& __i, difference_type __n) { + auto __tmp = __i; + __tmp -= __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __ra_iterator& __x, const __ra_iterator& __y) { + return __x.__iter_ - __y.__iter_; + } +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_SET_RA_ITERATOR_H diff --git a/lib/libcxx/include/__flat_set/utils.h b/lib/libcxx/include/__flat_set/utils.h new file mode 100644 index 0000000000..8a7f93a004 --- /dev/null +++ b/lib/libcxx/include/__flat_set/utils.h @@ -0,0 +1,82 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_SET_UTILS_H +#define _LIBCPP___FLAT_SET_UTILS_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__type_traits/container_traits.h> +#include <__type_traits/decay.h> +#include <__utility/exception_guard.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +// These utilities are defined in a class instead of a namespace so that this class can be befriended more easily. +struct __flat_set_utils { + // Emplace a key into a flat_{multi}set, at the exact position that + // __it point to, assuming that the key is not already present in the set. + // When an exception is thrown during the emplacement, the function will clear the set if the container does not + // have strong exception safety guarantee on emplacement. + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto + __emplace_exact_pos(_Set& __set, _Iter&& __iter, _KeyArg&& __key) { + using _KeyContainer = typename decay_t<_Set>::container_type; + auto __on_failure = std::__make_exception_guard([&]() noexcept { + if constexpr (!__container_traits<_KeyContainer>::__emplacement_has_strong_exception_safety_guarantee) { + __set.clear() /* noexcept */; + } + }); + auto __key_it = __set.__keys_.emplace(__iter.__base(), std::forward<_KeyArg>(__key)); + __on_failure.__complete(); + return typename decay_t<_Set>::iterator(std::move(__key_it)); + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static void + __append(_Set& __set, _InputIterator __first, _InputIterator __last) { + __set.__keys_.insert(__set.__keys_.end(), std::move(__first), std::move(__last)); + } + + template + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static void __append(_Set& __set, _Range&& __rng) { + if constexpr (requires { __set.__keys_.insert_range(__set.__keys_.end(), std::forward<_Range>(__rng)); }) { + // C++23 Sequence Container should have insert_range member function + // Note that not all Sequence Containers provide append_range. + __set.__keys_.insert_range(__set.__keys_.end(), std::forward<_Range>(__rng)); + } else if constexpr (ranges::common_range<_Range> && + __has_input_iterator_category>::value) { + __set.__keys_.insert(__set.__keys_.end(), ranges::begin(__rng), ranges::end(__rng)); + } else { + for (auto&& __x : __rng) { + __set.__keys_.insert(__set.__keys_.end(), std::forward(__x)); + } + } + } +}; +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // #define _LIBCPP___FLAT_SET_UTILS_H diff --git a/lib/libcxx/include/__format/buffer.h b/lib/libcxx/include/__format/buffer.h index 0c054bbc3a..e7454f08f4 100644 --- a/lib/libcxx/include/__format/buffer.h +++ b/lib/libcxx/include/__format/buffer.h @@ -15,7 +15,6 @@ #include <__algorithm/max.h> #include <__algorithm/min.h> #include <__algorithm/ranges_copy.h> -#include <__algorithm/ranges_copy_n.h> #include <__algorithm/transform.h> #include <__algorithm/unwrap_iter.h> #include <__concepts/same_as.h> @@ -33,7 +32,7 @@ #include <__memory/allocator.h> #include <__memory/allocator_traits.h> #include <__memory/construct_at.h> -#include <__memory/ranges_construct_at.h> +#include <__memory/destroy.h> #include <__memory/uninitialized_algorithms.h> #include <__type_traits/add_pointer.h> #include <__type_traits/conditional.h> @@ -180,7 +179,7 @@ private: /// The latter option allows formatted_size to use the output buffer without /// ever writing anything to the buffer. template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __output_buffer { +class __output_buffer { public: using value_type _LIBCPP_NODEBUG = _CharT; using __prepare_write_type _LIBCPP_NODEBUG = void (*)(__output_buffer<_CharT>&, size_t); @@ -340,18 +339,18 @@ concept __insertable = /// Extract the container type of a \ref back_insert_iterator. template -struct _LIBCPP_TEMPLATE_VIS __back_insert_iterator_container { +struct __back_insert_iterator_container { using type _LIBCPP_NODEBUG = void; }; template <__insertable _Container> -struct _LIBCPP_TEMPLATE_VIS __back_insert_iterator_container> { +struct __back_insert_iterator_container> { using type _LIBCPP_NODEBUG = _Container; }; // A dynamically growing buffer. template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __allocating_buffer : public __output_buffer<_CharT> { +class __allocating_buffer : public __output_buffer<_CharT> { public: __allocating_buffer(const __allocating_buffer&) = delete; __allocating_buffer& operator=(const __allocating_buffer&) = delete; @@ -408,7 +407,7 @@ private: // A buffer that directly writes to the underlying buffer. template -class _LIBCPP_TEMPLATE_VIS __direct_iterator_buffer : public __output_buffer<_CharT> { +class __direct_iterator_buffer : public __output_buffer<_CharT> { public: [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __direct_iterator_buffer(_OutIt __out_it) : __direct_iterator_buffer{__out_it, nullptr} {} @@ -437,7 +436,7 @@ private: // A buffer that writes its output to the end of a container. template -class _LIBCPP_TEMPLATE_VIS __container_inserter_buffer : public __output_buffer<_CharT> { +class __container_inserter_buffer : public __output_buffer<_CharT> { public: [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __container_inserter_buffer(_OutIt __out_it) : __container_inserter_buffer{__out_it, nullptr} {} @@ -478,7 +477,7 @@ private: // Unlike the __container_inserter_buffer this class' performance does benefit // from allocating and then inserting. template -class _LIBCPP_TEMPLATE_VIS __iterator_buffer : public __allocating_buffer<_CharT> { +class __iterator_buffer : public __allocating_buffer<_CharT> { public: [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __iterator_buffer(_OutIt __out_it) : __allocating_buffer<_CharT>{}, __out_it_{std::move(__out_it)} {} @@ -496,7 +495,7 @@ private: // Selects the type of the buffer used for the output iterator. template -class _LIBCPP_TEMPLATE_VIS __buffer_selector { +class __buffer_selector { using _Container _LIBCPP_NODEBUG = __back_insert_iterator_container<_OutIt>::type; public: @@ -510,7 +509,7 @@ public: // A buffer that counts and limits the number of insertions. template -class _LIBCPP_TEMPLATE_VIS __format_to_n_buffer : private __buffer_selector<_OutIt, _CharT>::type { +class __format_to_n_buffer : private __buffer_selector<_OutIt, _CharT>::type { public: using _Base _LIBCPP_NODEBUG = __buffer_selector<_OutIt, _CharT>::type; @@ -534,7 +533,7 @@ private: // Since formatted_size only needs to know the size, the output itself is // discarded. template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer : private __output_buffer<_CharT> { +class __formatted_size_buffer : private __output_buffer<_CharT> { public: using _Base _LIBCPP_NODEBUG = __output_buffer<_CharT>; @@ -577,7 +576,7 @@ private: // This class uses its own buffer management, since using vector // would lead to a circular include with formatter for vector. template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __retarget_buffer { +class __retarget_buffer { using _Alloc _LIBCPP_NODEBUG = allocator<_CharT>; public: @@ -621,7 +620,7 @@ public: } _LIBCPP_HIDE_FROM_ABI ~__retarget_buffer() { - ranges::destroy_n(__ptr_, __size_); + std::destroy_n(__ptr_, __size_); allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, __capacity_); } @@ -686,7 +685,7 @@ private: // guard is optimized away so there is no runtime overhead. std::uninitialized_move_n(__ptr_, __size_, __result.ptr); __guard.__complete(); - ranges::destroy_n(__ptr_, __size_); + std::destroy_n(__ptr_, __size_); allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, __capacity_); __ptr_ = __result.ptr; diff --git a/lib/libcxx/include/__format/container_adaptor.h b/lib/libcxx/include/__format/container_adaptor.h index 48d42ee7d9..6a99a02861 100644 --- a/lib/libcxx/include/__format/container_adaptor.h +++ b/lib/libcxx/include/__format/container_adaptor.h @@ -35,7 +35,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // adaptor headers. To use the format functions users already include . template -struct _LIBCPP_TEMPLATE_VIS __formatter_container_adaptor { +struct __formatter_container_adaptor { private: using __maybe_const_container _LIBCPP_NODEBUG = __fmt_maybe_const; using __maybe_const_adaptor _LIBCPP_NODEBUG = __maybe_const, _Adaptor>; @@ -55,15 +55,15 @@ public: }; template _Container> -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> +struct formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; template -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> +struct formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; template _Container> -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> +struct formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; #endif // _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__format/escaped_output_table.h b/lib/libcxx/include/__format/escaped_output_table.h index 7a0b352398..1401b4637d 100644 --- a/lib/libcxx/include/__format/escaped_output_table.h +++ b/lib/libcxx/include/__format/escaped_output_table.h @@ -109,7 +109,7 @@ namespace __escaped_output_table { /// - bits [14, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. Note the code expects code units the fit /// into 18 bits, instead of the 21 bits needed for the full Unicode range. -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[735] = { 0x00000020 /* 00000000 - 00000020 [ 33] */, 0x001fc021 /* 0000007f - 000000a0 [ 34] */, 0x002b4000 /* 000000ad - 000000ad [ 1] */, @@ -136,7 +136,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x02170001 /* 0000085c - 0000085d [ 2] */, 0x0217c000 /* 0000085f - 0000085f [ 1] */, 0x021ac004 /* 0000086b - 0000086f [ 5] */, - 0x0223c008 /* 0000088f - 00000897 [ 9] */, + 0x0223c007 /* 0000088f - 00000896 [ 8] */, 0x02388000 /* 000008e2 - 000008e2 [ 1] */, 0x02610000 /* 00000984 - 00000984 [ 1] */, 0x02634001 /* 0000098d - 0000098e [ 2] */, @@ -331,12 +331,11 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x06a68005 /* 00001a9a - 00001a9f [ 6] */, 0x06ab8001 /* 00001aae - 00001aaf [ 2] */, 0x06b3c030 /* 00001acf - 00001aff [ 49] */, - 0x06d34002 /* 00001b4d - 00001b4f [ 3] */, - 0x06dfc000 /* 00001b7f - 00001b7f [ 1] */, + 0x06d34000 /* 00001b4d - 00001b4d [ 1] */, 0x06fd0007 /* 00001bf4 - 00001bfb [ 8] */, 0x070e0002 /* 00001c38 - 00001c3a [ 3] */, 0x07128002 /* 00001c4a - 00001c4c [ 3] */, - 0x07224006 /* 00001c89 - 00001c8f [ 7] */, + 0x0722c004 /* 00001c8b - 00001c8f [ 5] */, 0x072ec001 /* 00001cbb - 00001cbc [ 2] */, 0x07320007 /* 00001cc8 - 00001ccf [ 8] */, 0x073ec004 /* 00001cfb - 00001cff [ 5] */, @@ -364,7 +363,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x0830400e /* 000020c1 - 000020cf [ 15] */, 0x083c400e /* 000020f1 - 000020ff [ 15] */, 0x08630003 /* 0000218c - 0000218f [ 4] */, - 0x0909c018 /* 00002427 - 0000243f [ 25] */, + 0x090a8015 /* 0000242a - 0000243f [ 22] */, 0x0912c014 /* 0000244b - 0000245f [ 21] */, 0x0add0001 /* 00002b74 - 00002b75 [ 2] */, 0x0ae58000 /* 00002b96 - 00002b96 [ 1] */, @@ -393,16 +392,16 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x0c400004 /* 00003100 - 00003104 [ 5] */, 0x0c4c0000 /* 00003130 - 00003130 [ 1] */, 0x0c63c000 /* 0000318f - 0000318f [ 1] */, - 0x0c79000a /* 000031e4 - 000031ee [ 11] */, + 0x0c798008 /* 000031e6 - 000031ee [ 9] */, 0x0c87c000 /* 0000321f - 0000321f [ 1] */, 0x29234002 /* 0000a48d - 0000a48f [ 3] */, 0x2931c008 /* 0000a4c7 - 0000a4cf [ 9] */, 0x298b0013 /* 0000a62c - 0000a63f [ 20] */, 0x29be0007 /* 0000a6f8 - 0000a6ff [ 8] */, - 0x29f2c004 /* 0000a7cb - 0000a7cf [ 5] */, + 0x29f38001 /* 0000a7ce - 0000a7cf [ 2] */, 0x29f48000 /* 0000a7d2 - 0000a7d2 [ 1] */, 0x29f50000 /* 0000a7d4 - 0000a7d4 [ 1] */, - 0x29f68017 /* 0000a7da - 0000a7f1 [ 24] */, + 0x29f74014 /* 0000a7dd - 0000a7f1 [ 21] */, 0x2a0b4002 /* 0000a82d - 0000a82f [ 3] */, 0x2a0e8005 /* 0000a83a - 0000a83f [ 6] */, 0x2a1e0007 /* 0000a878 - 0000a87f [ 8] */, @@ -491,7 +490,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x41688000 /* 000105a2 - 000105a2 [ 1] */, 0x416c8000 /* 000105b2 - 000105b2 [ 1] */, 0x416e8000 /* 000105ba - 000105ba [ 1] */, - 0x416f4042 /* 000105bd - 000105ff [ 67] */, + 0x416f4002 /* 000105bd - 000105bf [ 3] */, + 0x417d000b /* 000105f4 - 000105ff [ 12] */, 0x41cdc008 /* 00010737 - 0001073f [ 9] */, 0x41d58009 /* 00010756 - 0001075f [ 10] */, 0x41da0017 /* 00010768 - 0001077f [ 24] */, @@ -534,11 +534,15 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x432cc00c /* 00010cb3 - 00010cbf [ 13] */, 0x433cc006 /* 00010cf3 - 00010cf9 [ 7] */, 0x434a0007 /* 00010d28 - 00010d2f [ 8] */, - 0x434e8125 /* 00010d3a - 00010e5f [ 294] */, + 0x434e8005 /* 00010d3a - 00010d3f [ 6] */, + 0x43598002 /* 00010d66 - 00010d68 [ 3] */, + 0x43618007 /* 00010d86 - 00010d8d [ 8] */, + 0x436400cf /* 00010d90 - 00010e5f [ 208] */, 0x439fc000 /* 00010e7f - 00010e7f [ 1] */, 0x43aa8000 /* 00010eaa - 00010eaa [ 1] */, 0x43ab8001 /* 00010eae - 00010eaf [ 2] */, - 0x43ac804a /* 00010eb2 - 00010efc [ 75] */, + 0x43ac800f /* 00010eb2 - 00010ec1 [ 16] */, + 0x43b14036 /* 00010ec5 - 00010efb [ 55] */, 0x43ca0007 /* 00010f28 - 00010f2f [ 8] */, 0x43d68015 /* 00010f5a - 00010f6f [ 22] */, 0x43e28025 /* 00010f8a - 00010faf [ 38] */, @@ -578,7 +582,18 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x44d60004 /* 00011358 - 0001135c [ 5] */, 0x44d90001 /* 00011364 - 00011365 [ 2] */, 0x44db4002 /* 0001136d - 0001136f [ 3] */, - 0x44dd408a /* 00011375 - 000113ff [ 139] */, + 0x44dd400a /* 00011375 - 0001137f [ 11] */, + 0x44e28000 /* 0001138a - 0001138a [ 1] */, + 0x44e30001 /* 0001138c - 0001138d [ 2] */, + 0x44e3c000 /* 0001138f - 0001138f [ 1] */, + 0x44ed8000 /* 000113b6 - 000113b6 [ 1] */, + 0x44f04000 /* 000113c1 - 000113c1 [ 1] */, + 0x44f0c001 /* 000113c3 - 000113c4 [ 2] */, + 0x44f18000 /* 000113c6 - 000113c6 [ 1] */, + 0x44f2c000 /* 000113cb - 000113cb [ 1] */, + 0x44f58000 /* 000113d6 - 000113d6 [ 1] */, + 0x44f64007 /* 000113d9 - 000113e0 [ 8] */, + 0x44f8c01c /* 000113e3 - 000113ff [ 29] */, 0x45170000 /* 0001145c - 0001145c [ 1] */, 0x4518801d /* 00011462 - 0001147f [ 30] */, 0x45320007 /* 000114c8 - 000114cf [ 8] */, @@ -589,7 +604,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x45968005 /* 0001165a - 0001165f [ 6] */, 0x459b4012 /* 0001166d - 0001167f [ 19] */, 0x45ae8005 /* 000116ba - 000116bf [ 6] */, - 0x45b28035 /* 000116ca - 000116ff [ 54] */, + 0x45b28005 /* 000116ca - 000116cf [ 6] */, + 0x45b9001b /* 000116e4 - 000116ff [ 28] */, 0x45c6c001 /* 0001171b - 0001171c [ 2] */, 0x45cb0003 /* 0001172c - 0001172f [ 4] */, 0x45d1c0b8 /* 00011747 - 000117ff [ 185] */, @@ -609,7 +625,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x46920007 /* 00011a48 - 00011a4f [ 8] */, 0x46a8c00c /* 00011aa3 - 00011aaf [ 13] */, 0x46be4006 /* 00011af9 - 00011aff [ 7] */, - 0x46c280f5 /* 00011b0a - 00011bff [ 246] */, + 0x46c280b5 /* 00011b0a - 00011bbf [ 182] */, + 0x46f8800d /* 00011be2 - 00011bef [ 14] */, + 0x46fe8005 /* 00011bfa - 00011bff [ 6] */, 0x47024000 /* 00011c09 - 00011c09 [ 1] */, 0x470dc000 /* 00011c37 - 00011c37 [ 1] */, 0x47118009 /* 00011c46 - 00011c4f [ 10] */, @@ -633,7 +651,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x47be4006 /* 00011ef9 - 00011eff [ 7] */, 0x47c44000 /* 00011f11 - 00011f11 [ 1] */, 0x47cec002 /* 00011f3b - 00011f3d [ 3] */, - 0x47d68055 /* 00011f5a - 00011faf [ 86] */, + 0x47d6c054 /* 00011f5b - 00011faf [ 85] */, 0x47ec400e /* 00011fb1 - 00011fbf [ 15] */, 0x47fc800c /* 00011ff2 - 00011ffe [ 13] */, 0x48e68065 /* 0001239a - 000123ff [ 102] */, @@ -642,8 +660,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x49510a4b /* 00012544 - 00012f8f [ 2636] */, 0x4bfcc00c /* 00012ff3 - 00012fff [ 13] */, 0x4d0c000f /* 00013430 - 0001343f [ 16] */, - 0x4d158fa9 /* 00013456 - 000143ff [ 4010] */, - 0x5191e1b8 /* 00014647 - 000167ff [ 8633] */, + 0x4d158009 /* 00013456 - 0001345f [ 10] */, + 0x50fec004 /* 000143fb - 000143ff [ 5] */, + 0x5191dab8 /* 00014647 - 000160ff [ 6841] */, + 0x584e86c5 /* 0001613a - 000167ff [ 1734] */, 0x5a8e4006 /* 00016a39 - 00016a3f [ 7] */, 0x5a97c000 /* 00016a5f - 00016a5f [ 1] */, 0x5a9a8003 /* 00016a6a - 00016a6d [ 4] */, @@ -655,7 +675,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x5ad68000 /* 00016b5a - 00016b5a [ 1] */, 0x5ad88000 /* 00016b62 - 00016b62 [ 1] */, 0x5ade0004 /* 00016b78 - 00016b7c [ 5] */, - 0x5ae402af /* 00016b90 - 00016e3f [ 688] */, + 0x5ae401af /* 00016b90 - 00016d3f [ 432] */, + 0x5b5e80c5 /* 00016d7a - 00016e3f [ 198] */, 0x5ba6c064 /* 00016e9b - 00016eff [ 101] */, 0x5bd2c003 /* 00016f4b - 00016f4e [ 4] */, 0x5be20006 /* 00016f88 - 00016f8e [ 7] */, @@ -663,7 +684,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x5bf9400a /* 00016fe5 - 00016fef [ 11] */, 0x5bfc800d /* 00016ff2 - 00016fff [ 14] */, 0x61fe0007 /* 000187f8 - 000187ff [ 8] */, - 0x63358029 /* 00018cd6 - 00018cff [ 42] */, + 0x63358028 /* 00018cd6 - 00018cfe [ 41] */, 0x634262e6 /* 00018d09 - 0001afef [ 8935] */, 0x6bfd0000 /* 0001aff4 - 0001aff4 [ 1] */, 0x6bff0000 /* 0001affc - 0001affc [ 1] */, @@ -678,7 +699,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x6f1f4002 /* 0001bc7d - 0001bc7f [ 3] */, 0x6f224006 /* 0001bc89 - 0001bc8f [ 7] */, 0x6f268001 /* 0001bc9a - 0001bc9b [ 2] */, - 0x6f28125f /* 0001bca0 - 0001ceff [ 4704] */, + 0x6f280f5f /* 0001bca0 - 0001cbff [ 3936] */, + 0x733e8005 /* 0001ccfa - 0001ccff [ 6] */, + 0x73ad004b /* 0001ceb4 - 0001ceff [ 76] */, 0x73cb8001 /* 0001cf2e - 0001cf2f [ 2] */, 0x73d1c008 /* 0001cf47 - 0001cf4f [ 9] */, 0x73f1003b /* 0001cfc4 - 0001cfff [ 60] */, @@ -730,7 +753,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x78abc010 /* 0001e2af - 0001e2bf [ 17] */, 0x78be8004 /* 0001e2fa - 0001e2fe [ 5] */, 0x78c001cf /* 0001e300 - 0001e4cf [ 464] */, - 0x793e82e5 /* 0001e4fa - 0001e7df [ 742] */, + 0x793e80d5 /* 0001e4fa - 0001e5cf [ 214] */, + 0x797ec003 /* 0001e5fb - 0001e5fe [ 4] */, + 0x798001df /* 0001e600 - 0001e7df [ 480] */, 0x79f9c000 /* 0001e7e7 - 0001e7e7 [ 1] */, 0x79fb0000 /* 0001e7ec - 0001e7ec [ 1] */, 0x79fbc000 /* 0001e7ef - 0001e7ef [ 1] */, @@ -800,18 +825,17 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { 0x7e168005 /* 0001f85a - 0001f85f [ 6] */, 0x7e220007 /* 0001f888 - 0001f88f [ 8] */, 0x7e2b8001 /* 0001f8ae - 0001f8af [ 2] */, - 0x7e2c804d /* 0001f8b2 - 0001f8ff [ 78] */, + 0x7e2f0003 /* 0001f8bc - 0001f8bf [ 4] */, + 0x7e30803d /* 0001f8c2 - 0001f8ff [ 62] */, 0x7e95000b /* 0001fa54 - 0001fa5f [ 12] */, 0x7e9b8001 /* 0001fa6e - 0001fa6f [ 2] */, 0x7e9f4002 /* 0001fa7d - 0001fa7f [ 3] */, - 0x7ea24006 /* 0001fa89 - 0001fa8f [ 7] */, - 0x7eaf8000 /* 0001fabe - 0001fabe [ 1] */, - 0x7eb18007 /* 0001fac6 - 0001facd [ 8] */, - 0x7eb70003 /* 0001fadc - 0001fadf [ 4] */, - 0x7eba4006 /* 0001fae9 - 0001faef [ 7] */, + 0x7ea28004 /* 0001fa8a - 0001fa8e [ 5] */, + 0x7eb1c006 /* 0001fac7 - 0001facd [ 7] */, + 0x7eb74001 /* 0001fadd - 0001fade [ 2] */, + 0x7eba8005 /* 0001faea - 0001faef [ 6] */, 0x7ebe4006 /* 0001faf9 - 0001faff [ 7] */, 0x7ee4c000 /* 0001fb93 - 0001fb93 [ 1] */, - 0x7ef2c024 /* 0001fbcb - 0001fbef [ 37] */, 0x7efe8405 /* 0001fbfa - 0001ffff [ 1030] */, 0xa9b8001f /* 0002a6e0 - 0002a6ff [ 32] */, 0xadce8005 /* 0002b73a - 0002b73f [ 6] */, diff --git a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h index 7653a9e03b..f76e018df7 100644 --- a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h +++ b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h @@ -125,7 +125,7 @@ enum class __property : uint8_t { /// following benchmark. /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp // clang-format off -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1501] = { 0x00000091, 0x00005005, 0x00005811, @@ -164,7 +164,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x00414842, 0x0042c822, 0x00448018, - 0x0044c072, + 0x0044b882, 0x00465172, 0x00471008, 0x004719f2, @@ -246,14 +246,12 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x0064101a, 0x0065e002, 0x0065f00a, - 0x0065f802, - 0x0066001a, + 0x0065f812, + 0x0066080a, 0x00661002, 0x0066181a, - 0x00663002, - 0x0066381a, - 0x0066501a, - 0x00666012, + 0x00663022, + 0x00665032, 0x0066a812, 0x00671012, 0x0067980a, @@ -318,10 +316,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x008b047c, 0x008d457b, 0x009ae822, - 0x00b89022, - 0x00b8a80a, - 0x00b99012, - 0x00b9a00a, + 0x00b89032, + 0x00b99022, 0x00ba9012, 0x00bb9012, 0x00bda012, @@ -361,29 +357,23 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x00d581e2, 0x00d80032, 0x00d8200a, - 0x00d9a062, - 0x00d9d80a, - 0x00d9e002, - 0x00d9e84a, - 0x00da1002, - 0x00da181a, + 0x00d9a092, + 0x00d9f03a, + 0x00da1022, 0x00db5882, 0x00dc0012, 0x00dc100a, 0x00dd080a, 0x00dd1032, 0x00dd301a, - 0x00dd4012, - 0x00dd500a, - 0x00dd5822, + 0x00dd4052, 0x00df3002, 0x00df380a, 0x00df4012, 0x00df502a, 0x00df6802, 0x00df700a, - 0x00df7822, - 0x00df901a, + 0x00df7842, 0x00e1207a, 0x00e16072, 0x00e1a01a, @@ -475,7 +465,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x0547f802, 0x05493072, 0x054a38a2, - 0x054a901a, + 0x054a900a, + 0x054a9802, 0x054b01c4, 0x054c0022, 0x054c180a, @@ -484,7 +475,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x054db032, 0x054dd01a, 0x054de012, - 0x054df02a, + 0x054df01a, + 0x054e0002, 0x054f2802, 0x05514852, 0x0551781a, @@ -1328,8 +1320,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x0851f802, 0x08572812, 0x08692032, + 0x086b4842, 0x08755812, - 0x0877e822, + 0x0877e032, 0x087a30a2, 0x087c1032, 0x0880000a, @@ -1357,7 +1350,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x088c100a, 0x088d982a, 0x088db082, - 0x088df81a, + 0x088df80a, + 0x088e0002, 0x088e1018, 0x088e4832, 0x088e700a, @@ -1365,9 +1359,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x0891602a, 0x08917822, 0x0891901a, - 0x0891a002, - 0x0891a80a, - 0x0891b012, + 0x0891a032, 0x0891f002, 0x08920802, 0x0896f802, @@ -1381,11 +1373,24 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x089a0002, 0x089a083a, 0x089a381a, - 0x089a582a, + 0x089a581a, + 0x089a6802, 0x089ab802, 0x089b101a, 0x089b3062, 0x089b8042, + 0x089dc002, + 0x089dc81a, + 0x089dd852, + 0x089e1002, + 0x089e2802, + 0x089e3822, + 0x089e500a, + 0x089e601a, + 0x089e7022, + 0x089e8808, + 0x089e9002, + 0x089f0812, 0x08a1a82a, 0x08a1c072, 0x08a2001a, @@ -1422,10 +1427,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x08b5600a, 0x08b56802, 0x08b5701a, - 0x08b58052, - 0x08b5b00a, - 0x08b5b802, - 0x08b8e822, + 0x08b58072, + 0x08b8e802, + 0x08b8f00a, + 0x08b8f802, 0x08b91032, 0x08b9300a, 0x08b93842, @@ -1436,9 +1441,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x08c98002, 0x08c9884a, 0x08c9b81a, - 0x08c9d812, - 0x08c9e80a, - 0x08c9f002, + 0x08c9d832, 0x08c9f808, 0x08ca000a, 0x08ca0808, @@ -1495,28 +1498,29 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x08f9a01a, 0x08f9b042, 0x08f9f01a, - 0x08fa0002, - 0x08fa080a, - 0x08fa1002, + 0x08fa0022, + 0x08fad002, 0x09a180f1, 0x09a20002, 0x09a238e2, + 0x0b08f0b2, + 0x0b09502a, + 0x0b096822, 0x0b578042, 0x0b598062, + 0x0b6b180c, + 0x0b6b383c, 0x0b7a7802, 0x0b7a8b6a, 0x0b7c7832, 0x0b7f2002, - 0x0b7f801a, + 0x0b7f8012, 0x0de4e812, 0x0de50031, 0x0e7802d2, 0x0e798162, - 0x0e8b2802, - 0x0e8b300a, - 0x0e8b3822, - 0x0e8b680a, - 0x0e8b7042, + 0x0e8b2842, + 0x0e8b6852, 0x0e8b9871, 0x0e8bd872, 0x0e8c2862, @@ -1538,6 +1542,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { 0x0f157002, 0x0f176032, 0x0f276032, + 0x0f2f7012, 0x0f468062, 0x0f4a2062, 0x0f8007f3, diff --git a/lib/libcxx/include/__format/format_arg.h b/lib/libcxx/include/__format/format_arg.h index 10f0ba9928..ed5e76275e 100644 --- a/lib/libcxx/include/__format/format_arg.h +++ b/lib/libcxx/include/__format/format_arg.h @@ -277,9 +277,9 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_NO_SPECIALIZATIONS basic_format_arg { +class _LIBCPP_NO_SPECIALIZATIONS basic_format_arg { public: - class _LIBCPP_TEMPLATE_VIS handle; + class handle; _LIBCPP_HIDE_FROM_ABI basic_format_arg() noexcept : __type_{__format::__arg_t::__none} {} @@ -355,7 +355,7 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS basic_format_arg<_Context>::handle { +class basic_format_arg<_Context>::handle { public: _LIBCPP_HIDE_FROM_ABI void format(basic_format_parse_context& __parse_ctx, _Context& __ctx) const { __handle_.__format_(__parse_ctx, __ctx, __handle_.__ptr_); diff --git a/lib/libcxx/include/__format/format_arg_store.h b/lib/libcxx/include/__format/format_arg_store.h index 4c5ee9e9e4..fbb4cad21b 100644 --- a/lib/libcxx/include/__format/format_arg_store.h +++ b/lib/libcxx/include/__format/format_arg_store.h @@ -14,13 +14,14 @@ # pragma GCC system_header #endif -#include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> +#include <__cstddef/size_t.h> #include <__format/concepts.h> #include <__format/format_arg.h> #include <__type_traits/conditional.h> #include <__type_traits/extent.h> +#include <__type_traits/integer_traits.h> #include <__type_traits/remove_const.h> #include #include @@ -32,6 +33,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format { +template +inline constexpr bool __is_bounded_array_of = false; + +template +inline constexpr bool __is_bounded_array_of<_Elem[_Len], _Elem> = true; + /// \returns The @c __arg_t based on the type of the formatting argument. /// /// \pre \c __formattable<_Tp, typename _Context::char_type> @@ -58,7 +65,7 @@ consteval __arg_t __determine_arg_t() { # endif // Signed integers -template +template consteval __arg_t __determine_arg_t() { if constexpr (sizeof(_Tp) <= sizeof(int)) return __arg_t::__int; @@ -73,7 +80,7 @@ consteval __arg_t __determine_arg_t() { } // Unsigned integers -template +template consteval __arg_t __determine_arg_t() { if constexpr (sizeof(_Tp) <= sizeof(unsigned)) return __arg_t::__unsigned; @@ -110,7 +117,7 @@ consteval __arg_t __determine_arg_t() { // Char array template - requires(is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>) + requires __is_bounded_array_of<_Tp, typename _Context::char_type> consteval __arg_t __determine_arg_t() { return __arg_t::__string_view; } @@ -164,17 +171,18 @@ consteval __arg_t __determine_arg_t() { template _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __value) noexcept { using _Dp = remove_const_t<_Tp>; - constexpr __arg_t __arg = __determine_arg_t<_Context, _Dp>(); + constexpr __arg_t __arg = __format::__determine_arg_t<_Context, _Dp>(); static_assert(__arg != __arg_t::__none, "the supplied type is not formattable"); static_assert(__formattable_with<_Tp, _Context>); + using __context_char_type = _Context::char_type; // Not all types can be used to directly initialize the // __basic_format_arg_value. First handle all types needing adjustment, the // final else requires no adjustment. if constexpr (__arg == __arg_t::__char_type) # if _LIBCPP_HAS_WIDE_CHARACTERS - if constexpr (same_as && same_as<_Dp, char>) + if constexpr (same_as<__context_char_type, wchar_t> && same_as<_Dp, char>) return basic_format_arg<_Context>{__arg, static_cast(static_cast(__value))}; else # endif @@ -189,14 +197,16 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu return basic_format_arg<_Context>{__arg, static_cast(__value)}; else if constexpr (__arg == __arg_t::__string_view) // Using std::size on a character array will add the NUL-terminator to the size. - if constexpr (is_array_v<_Dp>) + if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) { + const __context_char_type* const __pbegin = std::begin(__value); + const __context_char_type* const __pzero = + char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{}); + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array"); return basic_format_arg<_Context>{ - __arg, basic_string_view{__value, extent_v<_Dp> - 1}}; - else - // When the _Traits or _Allocator are different an implicit conversion will - // fail. - return basic_format_arg<_Context>{ - __arg, basic_string_view{__value.data(), __value.size()}}; + __arg, basic_string_view<__context_char_type>{__pbegin, static_cast(__pzero - __pbegin)}}; + } else + // When the _Traits or _Allocator are different an implicit conversion will fail. + return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value.data(), __value.size()}}; else if constexpr (__arg == __arg_t::__ptr) return basic_format_arg<_Context>{__arg, static_cast(__value)}; else if constexpr (__arg == __arg_t::__handle) @@ -247,7 +257,7 @@ struct __unpacked_format_arg_store { } // namespace __format template -struct _LIBCPP_TEMPLATE_VIS __format_arg_store { +struct __format_arg_store { _LIBCPP_HIDE_FROM_ABI __format_arg_store(_Args&... __args) noexcept { if constexpr (sizeof...(_Args) != 0) { if constexpr (__format::__use_packed_format_arg_store(sizeof...(_Args))) diff --git a/lib/libcxx/include/__format/format_args.h b/lib/libcxx/include/__format/format_args.h index b98663c06e..9dd7a5ed9c 100644 --- a/lib/libcxx/include/__format/format_args.h +++ b/lib/libcxx/include/__format/format_args.h @@ -26,7 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS basic_format_args { +class basic_format_args { public: template _LIBCPP_HIDE_FROM_ABI basic_format_args(const __format_arg_store<_Context, _Args...>& __store) noexcept diff --git a/lib/libcxx/include/__format/format_context.h b/lib/libcxx/include/__format/format_context.h index 4dbfdbc02a..e672ee7ad0 100644 --- a/lib/libcxx/include/__format/format_context.h +++ b/lib/libcxx/include/__format/format_context.h @@ -42,7 +42,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template requires output_iterator<_OutIt, const _CharT&> -class _LIBCPP_TEMPLATE_VIS basic_format_context; +class basic_format_context; # if _LIBCPP_HAS_LOCALIZATION /** @@ -72,13 +72,8 @@ using wformat_context = basic_format_context< back_insert_iterator<__format::__o template requires output_iterator<_OutIt, const _CharT&> -class - // clang-format off - _LIBCPP_TEMPLATE_VIS - _LIBCPP_PREFERRED_NAME(format_context) - _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wformat_context)) - // clang-format on - basic_format_context { +class _LIBCPP_PREFERRED_NAME(format_context) + _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wformat_context)) basic_format_context { public: using iterator = _OutIt; using char_type = _CharT; @@ -153,7 +148,7 @@ public: // Here the width of an element in input is determined dynamically. // Note when the top-level element has no width the retargeting is not needed. template -class _LIBCPP_TEMPLATE_VIS basic_format_context::__iterator, _CharT> { +class basic_format_context::__iterator, _CharT> { public: using iterator = typename __format::__retarget_buffer<_CharT>::__iterator; using char_type = _CharT; diff --git a/lib/libcxx/include/__format/format_functions.h b/lib/libcxx/include/__format/format_functions.h index 5feaf7e5a0..873265bc17 100644 --- a/lib/libcxx/include/__format/format_functions.h +++ b/lib/libcxx/include/__format/format_functions.h @@ -11,6 +11,8 @@ #define _LIBCPP___FORMAT_FORMAT_FUNCTIONS #include <__algorithm/clamp.h> +#include <__algorithm/ranges_find_first_of.h> +#include <__chrono/statically_widen.h> #include <__concepts/convertible_to.h> #include <__concepts/same_as.h> #include <__config> @@ -36,6 +38,7 @@ #include <__iterator/iterator_traits.h> // iter_value_t #include <__variant/monostate.h> #include +#include #include #include @@ -83,7 +86,7 @@ namespace __format { /// When parsing a handle which is not enabled the code is ill-formed. /// This helper uses the parser of the appropriate formatter for the stored type. template -class _LIBCPP_TEMPLATE_VIS __compile_time_handle { +class __compile_time_handle { public: template _LIBCPP_HIDE_FROM_ABI constexpr void __parse(_ParseContext& __ctx) const { @@ -110,7 +113,7 @@ private: // Dummy format_context only providing the parts used during constant // validation of the basic_format_string. template -struct _LIBCPP_TEMPLATE_VIS __compile_time_basic_format_context { +struct __compile_time_basic_format_context { public: using char_type = _CharT; @@ -339,12 +342,12 @@ _LIBCPP_HIDE_FROM_ABI constexpr typename _Ctx::iterator __vformat_to(_ParseCtx&& # if _LIBCPP_STD_VER >= 26 template -struct _LIBCPP_TEMPLATE_VIS __runtime_format_string { +struct __runtime_format_string { private: basic_string_view<_CharT> __str_; template - friend struct _LIBCPP_TEMPLATE_VIS basic_format_string; + friend struct basic_format_string; public: _LIBCPP_HIDE_FROM_ABI __runtime_format_string(basic_string_view<_CharT> __s) noexcept : __str_(__s) {} @@ -362,7 +365,7 @@ _LIBCPP_HIDE_FROM_ABI inline __runtime_format_string runtime_format(wst # endif // _LIBCPP_STD_VER >= 26 template -struct _LIBCPP_TEMPLATE_VIS basic_format_string { +struct basic_format_string { template requires convertible_to> consteval basic_format_string(const _Tp& __str) : __str_{__str} { @@ -447,10 +450,47 @@ format_to(_OutIt __out_it, wformat_string<_Args...> __fmt, _Args&&... __args) { } # endif +// Try constant folding the format string instead of going through the whole formatting machinery. If there is no +// constant folding no extra code should be emitted (with optimizations enabled) and the function returns nullopt. When +// constant folding is successful, the formatting is performed and the resulting string is returned. +namespace __format { +template +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional> __try_constant_folding( + basic_string_view<_CharT> __fmt, + basic_format_args>, _CharT>> __args) { + // Fold strings not containing '{' or '}' to just return the string + if (bool __is_identity = [&] [[__gnu__::__pure__]] // Make sure the compiler knows this call can be eliminated + { return std::ranges::find_first_of(__fmt, array{'{', '}'}) == __fmt.end(); }(); + __builtin_constant_p(__is_identity) && __is_identity) + return basic_string<_CharT>{__fmt}; + + // Fold '{}' to the appropriate conversion function + if (auto __only_first_arg = __fmt == _LIBCPP_STATICALLY_WIDEN(_CharT, "{}"); + __builtin_constant_p(__only_first_arg) && __only_first_arg) { + if (auto __arg = __args.get(0); __builtin_constant_p(__arg.__type_)) { + return std::__visit_format_arg( + [](_Tp&& __argument) -> optional> { + if constexpr (is_same_v, basic_string_view<_CharT>>) { + return basic_string<_CharT>{__argument}; + } else { + return nullopt; + } + }, + __arg); + } + } + + return nullopt; +} +} // namespace __format + // TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup // fires too eagerly, see http://llvm.org/PR61563. template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(string_view __fmt, format_args __args) { + auto __result = __format::__try_constant_folding(__fmt, __args); + if (__result.has_value()) + return *std::move(__result); __format::__allocating_buffer __buffer; std::vformat_to(__buffer.__make_output_iterator(), __fmt, __args); return string{__buffer.__view()}; @@ -462,6 +502,9 @@ template template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(wstring_view __fmt, wformat_args __args) { + auto __result = __format::__try_constant_folding(__fmt, __args); + if (__result.has_value()) + return *std::move(__result); __format::__allocating_buffer __buffer; std::vformat_to(__buffer.__make_output_iterator(), __fmt, __args); return wstring{__buffer.__view()}; diff --git a/lib/libcxx/include/__format/format_parse_context.h b/lib/libcxx/include/__format/format_parse_context.h index 459db751c9..67b90c7b7e 100644 --- a/lib/libcxx/include/__format/format_parse_context.h +++ b/lib/libcxx/include/__format/format_parse_context.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS basic_format_parse_context { +class basic_format_parse_context { public: using char_type = _CharT; using const_iterator = typename basic_string_view<_CharT>::const_iterator; diff --git a/lib/libcxx/include/__format/format_string.h b/lib/libcxx/include/__format/format_string.h index 5db5973dd5..b60ed4db38 100644 --- a/lib/libcxx/include/__format/format_string.h +++ b/lib/libcxx/include/__format/format_string.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format { template -struct _LIBCPP_TEMPLATE_VIS __parse_number_result { +struct __parse_number_result { _Iterator __last; uint32_t __value; }; diff --git a/lib/libcxx/include/__format/format_to_n_result.h b/lib/libcxx/include/__format/format_to_n_result.h index 344299e32f..0c807be732 100644 --- a/lib/libcxx/include/__format/format_to_n_result.h +++ b/lib/libcxx/include/__format/format_to_n_result.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -struct _LIBCPP_TEMPLATE_VIS format_to_n_result { +struct format_to_n_result { _OutIt out; iter_difference_t<_OutIt> size; }; diff --git a/lib/libcxx/include/__format/formatter.h b/lib/libcxx/include/__format/formatter.h index 39c2670dd8..cbbd9d4f7c 100644 --- a/lib/libcxx/include/__format/formatter.h +++ b/lib/libcxx/include/__format/formatter.h @@ -21,6 +21,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 +struct __disabled_formatter { + __disabled_formatter() = delete; + __disabled_formatter(const __disabled_formatter&) = delete; + __disabled_formatter& operator=(const __disabled_formatter&) = delete; +}; + /// The default formatter template. /// /// [format.formatter.spec]/5 @@ -28,14 +34,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD /// - is_default_constructible_v, /// - is_copy_constructible_v, /// - is_move_constructible_v, -/// - is_copy_assignable, and -/// - is_move_assignable. +/// - is_copy_assignable_v, and +/// - is_move_assignable_v. template -struct _LIBCPP_TEMPLATE_VIS formatter { - formatter() = delete; - formatter(const formatter&) = delete; - formatter& operator=(const formatter&) = delete; -}; +struct formatter : __disabled_formatter {}; # if _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__format/formatter_bool.h b/lib/libcxx/include/__format/formatter_bool.h index d08acd4744..33a148a546 100644 --- a/lib/libcxx/include/__format/formatter_bool.h +++ b/lib/libcxx/include/__format/formatter_bool.h @@ -33,7 +33,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter { +struct formatter { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { diff --git a/lib/libcxx/include/__format/formatter_char.h b/lib/libcxx/include/__format/formatter_char.h index 8b8fd2d42c..40eb5a02a7 100644 --- a/lib/libcxx/include/__format/formatter_char.h +++ b/lib/libcxx/include/__format/formatter_char.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS __formatter_char { +struct __formatter_char { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -75,14 +75,14 @@ public: }; template <> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; +struct formatter : public __formatter_char {}; # if _LIBCPP_HAS_WIDE_CHARACTERS template <> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; +struct formatter : public __formatter_char {}; template <> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; +struct formatter : public __formatter_char {}; # endif // _LIBCPP_HAS_WIDE_CHARACTERS # if _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__format/formatter_floating_point.h b/lib/libcxx/include/__format/formatter_floating_point.h index ac4be9b619..b200bc58ba 100644 --- a/lib/libcxx/include/__format/formatter_floating_point.h +++ b/lib/libcxx/include/__format/formatter_floating_point.h @@ -19,6 +19,7 @@ #include <__assert> #include <__charconv/chars_format.h> #include <__charconv/to_chars_floating_point.h> +#include <__charconv/to_chars_integral.h> #include <__charconv/to_chars_result.h> #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> @@ -140,7 +141,7 @@ struct __traits { /// Depending on the maximum size required for a value, the buffer is allocated /// on the stack or the heap. template -class _LIBCPP_TEMPLATE_VIS __float_buffer { +class __float_buffer { using _Traits _LIBCPP_NODEBUG = __traits<_Fp>; public: @@ -750,7 +751,7 @@ __format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__par } // namespace __formatter template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS __formatter_floating_point { +struct __formatter_floating_point { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -768,11 +769,11 @@ public: }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; +struct formatter : public __formatter_floating_point<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; +struct formatter : public __formatter_floating_point<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; +struct formatter : public __formatter_floating_point<_CharT> {}; # if _LIBCPP_STD_VER >= 23 template <> diff --git a/lib/libcxx/include/__format/formatter_integer.h b/lib/libcxx/include/__format/formatter_integer.h index 3f51b10d75..cf186c64e3 100644 --- a/lib/libcxx/include/__format/formatter_integer.h +++ b/lib/libcxx/include/__format/formatter_integer.h @@ -30,7 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS __formatter_integer { +struct __formatter_integer { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -58,34 +58,34 @@ public: // Signed integral types. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; # if _LIBCPP_HAS_INT128 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter<__int128_t, _CharT> : public __formatter_integer<_CharT> {}; +struct formatter<__int128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif // Unsigned integral types. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; +struct formatter : public __formatter_integer<_CharT> {}; # if _LIBCPP_HAS_INT128 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter<__uint128_t, _CharT> : public __formatter_integer<_CharT> {}; +struct formatter<__uint128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif # if _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__format/formatter_integral.h b/lib/libcxx/include/__format/formatter_integral.h index 996b7620b3..85f509fad4 100644 --- a/lib/libcxx/include/__format/formatter_integral.h +++ b/lib/libcxx/include/__format/formatter_integral.h @@ -338,7 +338,7 @@ _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer( if (__specs.__std_.__type_ != __format_spec::__type::__hexadecimal_upper_case) [[likely]] return __formatter::__write(__first, __last, __ctx.out(), __specs); - return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, __formatter::__hex_to_upper); + return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, std::__hex_to_upper); } template @@ -404,17 +404,17 @@ __format_integer(_Tp __value, _FormatContext& __ctx, __format_spec::__parsed_spe // template -struct _LIBCPP_TEMPLATE_VIS __bool_strings; +struct __bool_strings; template <> -struct _LIBCPP_TEMPLATE_VIS __bool_strings { +struct __bool_strings { static constexpr string_view __true{"true"}; static constexpr string_view __false{"false"}; }; # if _LIBCPP_HAS_WIDE_CHARACTERS template <> -struct _LIBCPP_TEMPLATE_VIS __bool_strings { +struct __bool_strings { static constexpr wstring_view __true{L"true"}; static constexpr wstring_view __false{L"false"}; }; diff --git a/lib/libcxx/include/__format/formatter_output.h b/lib/libcxx/include/__format/formatter_output.h index e1f1309cd2..cc74e3858a 100644 --- a/lib/libcxx/include/__format/formatter_output.h +++ b/lib/libcxx/include/__format/formatter_output.h @@ -45,24 +45,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __formatter { -_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char __c) { - switch (__c) { - case 'a': - return 'A'; - case 'b': - return 'B'; - case 'c': - return 'C'; - case 'd': - return 'D'; - case 'e': - return 'E'; - case 'f': - return 'F'; - } - return __c; -} - struct _LIBCPP_EXPORTED_FROM_ABI __padding_size_result { size_t __before_; size_t __after_; diff --git a/lib/libcxx/include/__format/formatter_pointer.h b/lib/libcxx/include/__format/formatter_pointer.h index 4ef48c168d..4c5896d589 100644 --- a/lib/libcxx/include/__format/formatter_pointer.h +++ b/lib/libcxx/include/__format/formatter_pointer.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS __formatter_pointer { +struct __formatter_pointer { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -59,11 +59,11 @@ public: // - template<> struct formatter; // - template<> struct formatter; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; +struct formatter : public __formatter_pointer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; +struct formatter : public __formatter_pointer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; +struct formatter : public __formatter_pointer<_CharT> {}; # if _LIBCPP_STD_VER >= 23 template <> diff --git a/lib/libcxx/include/__format/formatter_string.h b/lib/libcxx/include/__format/formatter_string.h index 30084e5822..bad6a4d2bb 100644 --- a/lib/libcxx/include/__format/formatter_string.h +++ b/lib/libcxx/include/__format/formatter_string.h @@ -10,6 +10,7 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H #define _LIBCPP___FORMAT_FORMATTER_STRING_H +#include <__assert> #include <__config> #include <__format/concepts.h> #include <__format/format_parse_context.h> @@ -17,6 +18,7 @@ #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> #include <__format/write_escaped.h> +#include #include #include @@ -29,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS __formatter_string { +struct __formatter_string { public: template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -58,7 +60,7 @@ public: // Formatter const char*. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_string<_CharT> { +struct formatter : public __formatter_string<_CharT> { using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template @@ -77,7 +79,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __formatte // Formatter char*. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS formatter<_CharT*, _CharT> : public formatter { +struct formatter<_CharT*, _CharT> : public formatter { using _Base _LIBCPP_NODEBUG = formatter; template @@ -88,20 +90,21 @@ struct _LIBCPP_TEMPLATE_VIS formatter<_CharT*, _CharT> : public formatter -struct _LIBCPP_TEMPLATE_VIS formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> { +struct formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> { using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator format(const _CharT (&__str)[_Size], _FormatContext& __ctx) const { - return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx); + const _CharT* const __pzero = char_traits<_CharT>::find(__str, _Size, _CharT{}); + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array"); + return _Base::format(basic_string_view<_CharT>(__str, static_cast(__pzero - __str)), __ctx); } }; // Formatter std::string. template <__fmt_char_type _CharT, class _Traits, class _Allocator> -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> - : public __formatter_string<_CharT> { +struct formatter, _CharT> : public __formatter_string<_CharT> { using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template @@ -114,7 +117,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter, // Formatter std::string_view. template <__fmt_char_type _CharT, class _Traits> -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { +struct formatter, _CharT> : public __formatter_string<_CharT> { using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template @@ -125,6 +128,19 @@ struct _LIBCPP_TEMPLATE_VIS formatter, _CharT } }; +# if _LIBCPP_HAS_WIDE_CHARACTERS +template <> +struct formatter : __disabled_formatter {}; +template <> +struct formatter : __disabled_formatter {}; +template +struct formatter : __disabled_formatter {}; +template +struct formatter, wchar_t> : __disabled_formatter {}; +template +struct formatter, wchar_t> : __disabled_formatter {}; +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + # if _LIBCPP_STD_VER >= 23 template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; diff --git a/lib/libcxx/include/__format/formatter_tuple.h b/lib/libcxx/include/__format/formatter_tuple.h index bb841ef114..0b095e6f71 100644 --- a/lib/libcxx/include/__format/formatter_tuple.h +++ b/lib/libcxx/include/__format/formatter_tuple.h @@ -36,7 +36,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 23 template <__fmt_char_type _CharT, class _Tuple, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS __formatter_tuple { +struct __formatter_tuple { _LIBCPP_HIDE_FROM_ABI constexpr void set_separator(basic_string_view<_CharT> __separator) noexcept { __separator_ = __separator; } @@ -136,12 +136,10 @@ private: }; template <__fmt_char_type _CharT, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> - : public __formatter_tuple<_CharT, pair<_Args...>, _Args...> {}; +struct formatter, _CharT> : public __formatter_tuple<_CharT, pair<_Args...>, _Args...> {}; template <__fmt_char_type _CharT, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> - : public __formatter_tuple<_CharT, tuple<_Args...>, _Args...> {}; +struct formatter, _CharT> : public __formatter_tuple<_CharT, tuple<_Args...>, _Args...> {}; #endif // _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__format/indic_conjunct_break_table.h b/lib/libcxx/include/__format/indic_conjunct_break_table.h index df6cfe6a02..f48ea62590 100644 --- a/lib/libcxx/include/__format/indic_conjunct_break_table.h +++ b/lib/libcxx/include/__format/indic_conjunct_break_table.h @@ -107,10 +107,9 @@ enum class __property : uint8_t { /// following benchmark. /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp // clang-format off -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { - 0x00180139, - 0x001a807d, - 0x00241811, +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = { + 0x001801bd, + 0x00241819, 0x002c88b1, 0x002df801, 0x002e0805, @@ -125,6 +124,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { 0x0037500d, 0x00388801, 0x00398069, + 0x003d3029, 0x003f5821, 0x003fe801, 0x0040b00d, @@ -132,87 +132,174 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { 0x00412809, 0x00414811, 0x0042c809, - 0x0044c01d, + 0x0044b821, 0x0046505d, - 0x00471871, + 0x0047187d, 0x0048a890, + 0x0049d001, 0x0049e001, + 0x004a081d, 0x004a6802, - 0x004a880d, + 0x004a8819, 0x004ac01c, + 0x004b1005, 0x004bc01c, + 0x004c0801, 0x004ca84c, 0x004d5018, 0x004d9000, 0x004db00c, 0x004de001, + 0x004df001, + 0x004e080d, 0x004e6802, + 0x004eb801, 0x004ee004, 0x004ef800, + 0x004f1005, 0x004f8004, 0x004ff001, + 0x00500805, 0x0051e001, + 0x00520805, + 0x00523805, + 0x00525809, + 0x00528801, + 0x00538005, + 0x0053a801, + 0x00540805, 0x0054a84c, 0x00555018, 0x00559004, 0x0055a810, 0x0055e001, + 0x00560811, + 0x00563805, 0x00566802, + 0x00571005, 0x0057c800, + 0x0057d015, + 0x00580801, 0x0058a84c, 0x00595018, 0x00599004, 0x0059a810, 0x0059e001, + 0x0059f005, + 0x005a080d, 0x005a6802, + 0x005aa809, 0x005ae004, 0x005af800, + 0x005b1005, 0x005b8800, + 0x005c1001, + 0x005df001, + 0x005e0001, + 0x005e6801, + 0x005eb801, + 0x00600001, + 0x00602001, 0x0060a84c, 0x0061503c, 0x0061e001, + 0x0061f009, + 0x00623009, + 0x00625009, 0x00626802, 0x0062a805, 0x0062c008, + 0x00631005, + 0x00640801, 0x0065e001, + 0x0065f805, + 0x00661001, + 0x00663009, + 0x0066500d, + 0x0066a805, + 0x00671005, + 0x00680005, 0x0068a894, 0x0069d805, + 0x0069f001, + 0x006a080d, 0x006a6802, - 0x0071c009, - 0x0072400d, - 0x0075c009, - 0x0076400d, + 0x006ab801, + 0x006b1005, + 0x006c0801, + 0x006e5001, + 0x006e7801, + 0x006e9009, + 0x006eb001, + 0x006ef801, + 0x00718801, + 0x0071a019, + 0x0072381d, + 0x00758801, + 0x0075a021, + 0x00764019, 0x0078c005, 0x0079a801, 0x0079b801, 0x0079c801, - 0x007b8805, - 0x007ba001, - 0x007bd00d, - 0x007c0001, - 0x007c1009, + 0x007b8835, + 0x007c0011, 0x007c3005, + 0x007c6829, + 0x007cc88d, 0x007e3001, - 0x0081b801, + 0x0081680d, + 0x00819015, 0x0081c805, + 0x0081e805, + 0x0082c005, + 0x0082f009, + 0x0083880d, + 0x00841001, + 0x00842805, 0x00846801, + 0x0084e801, 0x009ae809, - 0x00b8a001, - 0x00be9001, + 0x00b8900d, + 0x00b99009, + 0x00ba9005, + 0x00bb9005, + 0x00bda005, + 0x00bdb819, + 0x00be3001, + 0x00be4829, 0x00bee801, + 0x00c05809, + 0x00c07801, + 0x00c42805, 0x00c54801, + 0x00c90009, + 0x00c93805, + 0x00c99001, 0x00c9c809, 0x00d0b805, + 0x00d0d801, + 0x00d2b001, + 0x00d2c019, 0x00d30001, - 0x00d3a81d, + 0x00d31001, + 0x00d3281d, + 0x00d39825, 0x00d3f801, - 0x00d58035, - 0x00d5f83d, - 0x00d9a001, + 0x00d58079, + 0x00d8000d, + 0x00d9a025, + 0x00da1009, 0x00db5821, - 0x00dd5801, + 0x00dc0005, + 0x00dd100d, + 0x00dd4015, 0x00df3001, - 0x00e1b801, + 0x00df4005, + 0x00df6801, + 0x00df7811, + 0x00e1601d, + 0x00e1b005, 0x00e68009, 0x00e6a031, 0x00e71019, @@ -221,82 +308,193 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { 0x00e7c005, 0x00ee00fd, 0x01006801, - 0x01068031, - 0x01070801, - 0x0107282d, + 0x01068081, 0x01677809, 0x016bf801, 0x016f007d, 0x01815015, 0x0184c805, - 0x05337801, + 0x0533780d, 0x0533a025, 0x0534f005, 0x05378005, + 0x05401001, + 0x05403001, + 0x05405801, + 0x05412805, 0x05416001, + 0x05462005, 0x05470045, - 0x05495809, + 0x0547f801, + 0x0549301d, + 0x054a3829, + 0x054a9801, + 0x054c0009, 0x054d9801, + 0x054db00d, + 0x054de005, + 0x054e0001, + 0x054f2801, + 0x05514815, + 0x05518805, + 0x0551a805, + 0x05521801, + 0x05526001, + 0x0553e001, 0x05558001, 0x05559009, 0x0555b805, 0x0555f005, 0x05560801, + 0x05576005, 0x0557b001, + 0x055f2801, + 0x055f4001, 0x055f6801, 0x07d8f001, + 0x07f0003d, 0x07f1003d, + 0x07fcf005, 0x080fe801, 0x08170001, 0x081bb011, - 0x08506801, - 0x08507801, + 0x08500809, + 0x08502805, + 0x0850600d, 0x0851c009, 0x0851f801, 0x08572805, 0x0869200d, + 0x086b4811, 0x08755805, - 0x0877e809, + 0x0877e00d, 0x087a3029, 0x087c100d, + 0x08800801, + 0x0881c039, 0x08838001, - 0x0883f801, - 0x0885d001, + 0x08839805, + 0x0883f809, + 0x0885980d, + 0x0885c805, + 0x08861001, 0x08880009, - 0x08899805, + 0x08893811, + 0x0889681d, 0x088b9801, - 0x088e5001, - 0x0891b001, - 0x08974805, + 0x088c0005, + 0x088db021, + 0x088e0001, + 0x088e480d, + 0x088e7801, + 0x08917809, + 0x0891a00d, + 0x0891f001, + 0x08920801, + 0x0896f801, + 0x0897181d, + 0x08980005, 0x0899d805, + 0x0899f001, + 0x089a0001, + 0x089a6801, + 0x089ab801, 0x089b3019, 0x089b8011, + 0x089dc001, + 0x089dd815, + 0x089e1001, + 0x089e2801, + 0x089e3809, + 0x089e7009, + 0x089e9001, + 0x089f0805, + 0x08a1c01d, + 0x08a21009, 0x08a23001, 0x08a2f001, - 0x08a61801, - 0x08ae0001, - 0x08b5b801, - 0x08b95801, - 0x08c1d001, - 0x08c9f001, + 0x08a58001, + 0x08a59815, + 0x08a5d001, + 0x08a5e801, + 0x08a5f805, + 0x08a61005, + 0x08ad7801, + 0x08ad900d, + 0x08ade005, + 0x08adf805, + 0x08aee005, + 0x08b1981d, + 0x08b1e801, + 0x08b1f805, + 0x08b55801, + 0x08b56801, + 0x08b5801d, + 0x08b8e801, + 0x08b8f801, + 0x08b9100d, + 0x08b93811, + 0x08c17821, + 0x08c1c805, + 0x08c98001, + 0x08c9d80d, 0x08ca1801, - 0x08d1a001, + 0x08cea00d, + 0x08ced005, + 0x08cf0001, + 0x08d00825, + 0x08d19815, + 0x08d1d80d, 0x08d23801, - 0x08d4c801, - 0x08ea1001, - 0x08ea2005, + 0x08d28815, + 0x08d2c809, + 0x08d45031, + 0x08d4c005, + 0x08e18019, + 0x08e1c015, + 0x08e1f801, + 0x08e49055, + 0x08e55019, + 0x08e59005, + 0x08e5a805, + 0x08e98815, + 0x08e9d001, + 0x08e9e005, + 0x08e9f819, + 0x08ea3801, + 0x08ec8005, + 0x08eca801, 0x08ecb801, - 0x08fa1001, + 0x08f79805, + 0x08f80005, + 0x08f9b011, + 0x08fa0009, + 0x08fad001, + 0x09a20001, + 0x09a23839, + 0x0b08f02d, + 0x0b096809, 0x0b578011, 0x0b598019, - 0x0de4f001, - 0x0e8b2801, - 0x0e8b3809, - 0x0e8b7011, + 0x0b7a7801, + 0x0b7c780d, + 0x0b7f2001, + 0x0b7f8005, + 0x0de4e805, + 0x0e7800b5, + 0x0e798059, + 0x0e8b2811, + 0x0e8b6815, 0x0e8bd81d, 0x0e8c2819, 0x0e8d500d, 0x0e921009, + 0x0ed000d9, + 0x0ed1d8c5, + 0x0ed3a801, + 0x0ed42001, + 0x0ed4d811, + 0x0ed50839, 0x0f000019, 0x0f004041, 0x0f00d819, @@ -307,8 +505,12 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { 0x0f157001, 0x0f17600d, 0x0f27600d, + 0x0f2f7005, 0x0f468019, - 0x0f4a2019}; + 0x0f4a2019, + 0x0f9fd811, + 0x7001017d, + 0x700803bd}; // clang-format on /// Returns the indic conjuct break property of a code point. diff --git a/lib/libcxx/include/__format/parser_std_format_spec.h b/lib/libcxx/include/__format/parser_std_format_spec.h index 415261acf0..99ab3dc23c 100644 --- a/lib/libcxx/include/__format/parser_std_format_spec.h +++ b/lib/libcxx/include/__format/parser_std_format_spec.h @@ -335,7 +335,7 @@ static_assert(is_trivially_copyable_v<__parsed_specifications>); /// set to zero. That way they can be repurposed if a future revision of the /// Standards adds new fields to std-format-spec. template -class _LIBCPP_TEMPLATE_VIS __parser { +class __parser { public: // Parses the format specification. // diff --git a/lib/libcxx/include/__format/range_default_formatter.h b/lib/libcxx/include/__format/range_default_formatter.h index bb4c520f5e..7149debb2f 100644 --- a/lib/libcxx/include/__format/range_default_formatter.h +++ b/lib/libcxx/include/__format/range_default_formatter.h @@ -52,7 +52,7 @@ _LIBCPP_DIAGNOSTIC_POP // There is no definition of this struct, it's purely intended to be used to // generate diagnostics. template -struct _LIBCPP_TEMPLATE_VIS __instantiated_the_primary_template_of_format_kind; +struct __instantiated_the_primary_template_of_format_kind; template constexpr range_format format_kind = [] { @@ -88,12 +88,12 @@ inline constexpr range_format format_kind<_Rp> = [] { }(); template -struct _LIBCPP_TEMPLATE_VIS __range_default_formatter; +struct __range_default_formatter; // Required specializations template -struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { +struct __range_default_formatter { private: using __maybe_const_r _LIBCPP_NODEBUG = __fmt_maybe_const<_Rp, _CharT>; range_formatter>, _CharT> __underlying_; @@ -120,7 +120,7 @@ public: }; template -struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { +struct __range_default_formatter { private: using __maybe_const_map _LIBCPP_NODEBUG = __fmt_maybe_const<_Rp, _CharT>; using __element_type _LIBCPP_NODEBUG = remove_cvref_t>; @@ -148,7 +148,7 @@ public: }; template -struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { +struct __range_default_formatter { private: using __maybe_const_set _LIBCPP_NODEBUG = __fmt_maybe_const<_Rp, _CharT>; using __element_type _LIBCPP_NODEBUG = remove_cvref_t>; @@ -173,7 +173,7 @@ public: template requires(_Kp == range_format::string || _Kp == range_format::debug_string) -struct _LIBCPP_TEMPLATE_VIS __range_default_formatter<_Kp, _Rp, _CharT> { +struct __range_default_formatter<_Kp, _Rp, _CharT> { private: // This deviates from the Standard, there the exposition only type is // formatter, charT> underlying_; @@ -205,7 +205,7 @@ public: template requires(format_kind<_Rp> != range_format::disabled && formattable, _CharT>) -struct _LIBCPP_TEMPLATE_VIS formatter<_Rp, _CharT> : __range_default_formatter, _Rp, _CharT> {}; +struct formatter<_Rp, _CharT> : __range_default_formatter, _Rp, _CharT> {}; #endif // _LIBCPP_STD_VER >= 23 diff --git a/lib/libcxx/include/__format/range_formatter.h b/lib/libcxx/include/__format/range_formatter.h index def55c86ce..0d7fe9970c 100644 --- a/lib/libcxx/include/__format/range_formatter.h +++ b/lib/libcxx/include/__format/range_formatter.h @@ -39,7 +39,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template requires same_as, _Tp> && formattable<_Tp, _CharT> -struct _LIBCPP_TEMPLATE_VIS range_formatter { +struct range_formatter { _LIBCPP_HIDE_FROM_ABI constexpr void set_separator(basic_string_view<_CharT> __separator) noexcept { __separator_ = __separator; } diff --git a/lib/libcxx/include/__format/width_estimation_table.h b/lib/libcxx/include/__format/width_estimation_table.h index 5b4b3950c6..0ea0b4f413 100644 --- a/lib/libcxx/include/__format/width_estimation_table.h +++ b/lib/libcxx/include/__format/width_estimation_table.h @@ -119,7 +119,7 @@ namespace __width_estimation_table { /// - bits [0, 13] The size of the range, allowing 16384 elements. /// - bits [14, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[110] = { 0x0440005f /* 00001100 - 0000115f [ 96] */, // 0x08c68001 /* 0000231a - 0000231b [ 2] */, // 0x08ca4001 /* 00002329 - 0000232a [ 2] */, // @@ -128,8 +128,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { 0x08fcc000 /* 000023f3 - 000023f3 [ 1] */, // 0x097f4001 /* 000025fd - 000025fe [ 2] */, // 0x09850001 /* 00002614 - 00002615 [ 2] */, // + 0x098c0007 /* 00002630 - 00002637 [ 8] */, // 0x0992000b /* 00002648 - 00002653 [ 12] */, // 0x099fc000 /* 0000267f - 0000267f [ 1] */, // + 0x09a28005 /* 0000268a - 0000268f [ 6] */, // 0x09a4c000 /* 00002693 - 00002693 [ 1] */, // 0x09a84000 /* 000026a1 - 000026a1 [ 1] */, // 0x09aa8001 /* 000026aa - 000026ab [ 2] */, // @@ -163,7 +165,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { 0x0c264066 /* 00003099 - 000030ff [ 103] */, // 0x0c41402a /* 00003105 - 0000312f [ 43] */, // 0x0c4c405d /* 00003131 - 0000318e [ 94] */, // - 0x0c640053 /* 00003190 - 000031e3 [ 84] */, // + 0x0c640055 /* 00003190 - 000031e5 [ 86] */, // 0x0c7bc02f /* 000031ef - 0000321e [ 48] */, // 0x0c880027 /* 00003220 - 00003247 [ 40] */, // 0x0c943fff /* 00003250 - 0000724f [16384] */, // @@ -182,7 +184,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { 0x5bfc0001 /* 00016ff0 - 00016ff1 [ 2] */, // 0x5c0017f7 /* 00017000 - 000187f7 [ 6136] */, // 0x620004d5 /* 00018800 - 00018cd5 [ 1238] */, // - 0x63400008 /* 00018d00 - 00018d08 [ 9] */, // + 0x633fc009 /* 00018cff - 00018d08 [ 10] */, // 0x6bfc0003 /* 0001aff0 - 0001aff3 [ 4] */, // 0x6bfd4006 /* 0001aff5 - 0001affb [ 7] */, // 0x6bff4001 /* 0001affd - 0001affe [ 2] */, // @@ -192,6 +194,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { 0x6c554000 /* 0001b155 - 0001b155 [ 1] */, // 0x6c590003 /* 0001b164 - 0001b167 [ 4] */, // 0x6c5c018b /* 0001b170 - 0001b2fb [ 396] */, // + 0x74c00056 /* 0001d300 - 0001d356 [ 87] */, // + 0x74d80016 /* 0001d360 - 0001d376 [ 23] */, // 0x7c010000 /* 0001f004 - 0001f004 [ 1] */, // 0x7c33c000 /* 0001f0cf - 0001f0cf [ 1] */, // 0x7c638000 /* 0001f18e - 0001f18e [ 1] */, // @@ -213,11 +217,10 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { 0x7dfc0000 /* 0001f7f0 - 0001f7f0 [ 1] */, // 0x7e4000ff /* 0001f900 - 0001f9ff [ 256] */, // 0x7e9c000c /* 0001fa70 - 0001fa7c [ 13] */, // - 0x7ea00008 /* 0001fa80 - 0001fa88 [ 9] */, // - 0x7ea4002d /* 0001fa90 - 0001fabd [ 46] */, // - 0x7eafc006 /* 0001fabf - 0001fac5 [ 7] */, // - 0x7eb3800d /* 0001face - 0001fadb [ 14] */, // - 0x7eb80008 /* 0001fae0 - 0001fae8 [ 9] */, // + 0x7ea00009 /* 0001fa80 - 0001fa89 [ 10] */, // + 0x7ea3c037 /* 0001fa8f - 0001fac6 [ 56] */, // + 0x7eb3800e /* 0001face - 0001fadc [ 15] */, // + 0x7eb7c00a /* 0001fadf - 0001fae9 [ 11] */, // 0x7ebc0008 /* 0001faf0 - 0001faf8 [ 9] */, // 0x80003fff /* 00020000 - 00023fff [16384] */, // 0x90003fff /* 00024000 - 00027fff [16384] */, // diff --git a/lib/libcxx/include/__functional/binary_function.h b/lib/libcxx/include/__functional/binary_function.h index bde8b03ef8..531274b7d4 100644 --- a/lib/libcxx/include/__functional/binary_function.h +++ b/lib/libcxx/include/__functional/binary_function.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION) template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binary_function { +struct _LIBCPP_DEPRECATED_IN_CXX11 binary_function { typedef _Arg1 first_argument_type; typedef _Arg2 second_argument_type; typedef _Result result_type; @@ -39,11 +39,10 @@ struct __binary_function_keep_layout_base { }; #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION) -_LIBCPP_DIAGNOSTIC_PUSH -_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations") +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template using __binary_function _LIBCPP_NODEBUG = binary_function<_Arg1, _Arg2, _Result>; -_LIBCPP_DIAGNOSTIC_POP +_LIBCPP_SUPPRESS_DEPRECATED_POP #else template using __binary_function _LIBCPP_NODEBUG = __binary_function_keep_layout_base<_Arg1, _Arg2, _Result>; diff --git a/lib/libcxx/include/__functional/binary_negate.h b/lib/libcxx/include/__functional/binary_negate.h index ce52b5ae9f..73acfb37a7 100644 --- a/lib/libcxx/include/__functional/binary_negate.h +++ b/lib/libcxx/include/__functional/binary_negate.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 binary_negate +class _LIBCPP_DEPRECATED_IN_CXX17 binary_negate : public __binary_function { diff --git a/lib/libcxx/include/__functional/bind.h b/lib/libcxx/include/__functional/bind.h index a3c327ab40..596cce03cd 100644 --- a/lib/libcxx/include/__functional/bind.h +++ b/lib/libcxx/include/__functional/bind.h @@ -130,7 +130,7 @@ struct __mu_return_invokable // false template struct __mu_return_invokable { - using type = __invoke_result_t<_Ti&, _Uj...>; + using type _LIBCPP_NODEBUG = __invoke_result_t<_Ti&, _Uj...>; }; template @@ -181,12 +181,12 @@ struct __bind_return; template struct __bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj, true> { - using type = __invoke_result_t< _Fp&, typename __mu_return< _BoundArgs, _TupleUj >::type... >; + using type _LIBCPP_NODEBUG = __invoke_result_t<_Fp&, typename __mu_return<_BoundArgs, _TupleUj>::type...>; }; template struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true> { - using type = __invoke_result_t< _Fp&, typename __mu_return< const _BoundArgs, _TupleUj >::type... >; + using type _LIBCPP_NODEBUG = __invoke_result_t<_Fp&, typename __mu_return::type...>; }; template diff --git a/lib/libcxx/include/__functional/binder1st.h b/lib/libcxx/include/__functional/binder1st.h index 04b51fefab..77a0322450 100644 --- a/lib/libcxx/include/__functional/binder1st.h +++ b/lib/libcxx/include/__functional/binder1st.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder1st +class _LIBCPP_DEPRECATED_IN_CXX11 binder1st : public __unary_function { protected: _Operation op; diff --git a/lib/libcxx/include/__functional/binder2nd.h b/lib/libcxx/include/__functional/binder2nd.h index 9d22e4430b..8cc644151a 100644 --- a/lib/libcxx/include/__functional/binder2nd.h +++ b/lib/libcxx/include/__functional/binder2nd.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 binder2nd +class _LIBCPP_DEPRECATED_IN_CXX11 binder2nd : public __unary_function { protected: _Operation op; diff --git a/lib/libcxx/include/__functional/boyer_moore_searcher.h b/lib/libcxx/include/__functional/boyer_moore_searcher.h index 1e49cc5464..6c51c32d2f 100644 --- a/lib/libcxx/include/__functional/boyer_moore_searcher.h +++ b/lib/libcxx/include/__functional/boyer_moore_searcher.h @@ -17,12 +17,10 @@ #include <__config> #include <__functional/hash.h> #include <__functional/operations.h> -#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__memory/shared_ptr.h> #include <__type_traits/make_unsigned.h> #include <__utility/pair.h> -#include <__vector/vector.h> #include #include #include @@ -88,7 +86,7 @@ public: template ::value_type>, class _BinaryPredicate = equal_to<>> -class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { +class boyer_moore_searcher { private: using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; @@ -125,8 +123,8 @@ public: template _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { - static_assert(__is_same_uncvref::value_type, - typename iterator_traits<_RandomAccessIterator2>::value_type>::value, + static_assert(is_same_v<__remove_cvref_t::value_type>, + __remove_cvref_t::value_type>>, "Corpus and Pattern iterators must point to the same type"); if (__first == __last) return std::make_pair(__last, __last); @@ -196,7 +194,7 @@ private: if (__count == 0) return; - vector __scratch(__count); + auto __scratch = std::make_unique(__count); __compute_bm_prefix(__first, __last, __pred, __scratch); for (size_t __i = 0; __i <= __count; ++__i) @@ -219,7 +217,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); template ::value_type>, class _BinaryPredicate = equal_to<>> -class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher { +class boyer_moore_horspool_searcher { private: using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; @@ -256,8 +254,8 @@ public: template _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { - static_assert(__is_same_uncvref::value_type, - typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, + static_assert(is_same_v<__remove_cvref_t::value_type>, + __remove_cvref_t::value_type>>, "Corpus and Pattern iterators must point to the same type"); if (__first == __last) return std::make_pair(__last, __last); diff --git a/lib/libcxx/include/__functional/default_searcher.h b/lib/libcxx/include/__functional/default_searcher.h index db89d10757..afbf07e238 100644 --- a/lib/libcxx/include/__functional/default_searcher.h +++ b/lib/libcxx/include/__functional/default_searcher.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // default searcher template > -class _LIBCPP_TEMPLATE_VIS default_searcher { +class default_searcher { public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 default_searcher(_ForwardIterator __f, _ForwardIterator __l, _BinaryPredicate __p = _BinaryPredicate()) diff --git a/lib/libcxx/include/__functional/function.h b/lib/libcxx/include/__functional/function.h index 2a1293cfcc..dc112ebfd0 100644 --- a/lib/libcxx/include/__functional/function.h +++ b/lib/libcxx/include/__functional/function.h @@ -17,13 +17,7 @@ #include <__functional/binary_function.h> #include <__functional/invoke.h> #include <__functional/unary_function.h> -#include <__iterator/iterator_traits.h> #include <__memory/addressof.h> -#include <__memory/allocator.h> -#include <__memory/allocator_destructor.h> -#include <__memory/allocator_traits.h> -#include <__memory/compressed_pair.h> -#include <__memory/unique_ptr.h> #include <__type_traits/aligned_storage.h> #include <__type_traits/decay.h> #include <__type_traits/is_core_convertible.h> @@ -34,9 +28,7 @@ #include <__type_traits/strip_signature.h> #include <__utility/forward.h> #include <__utility/move.h> -#include <__utility/piecewise_construct.h> #include <__utility/swap.h> -#include <__verbose_abort> #include #include @@ -71,7 +63,7 @@ public: _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_function_call() _NOEXCEPT override {} # endif -# ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE +# if _LIBCPP_AVAILABILITY_HAS_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE const char* what() const _NOEXCEPT override; # endif }; @@ -86,7 +78,7 @@ _LIBCPP_DIAGNOSTIC_POP } template -class _LIBCPP_TEMPLATE_VIS function; // undefined +class function; // undefined namespace __function { @@ -122,7 +114,7 @@ _LIBCPP_HIDE_FROM_ABI bool __not_null(function<_Fp> const& __f) { return !!__f; } -# if _LIBCPP_HAS_EXTENSION_BLOCKS +# if __has_extension(blocks) template _LIBCPP_HIDE_FROM_ABI bool __not_null(_Rp (^__p)(_Args...)) { return __p; @@ -133,108 +125,10 @@ _LIBCPP_HIDE_FROM_ABI bool __not_null(_Rp (^__p)(_Args...)) { namespace __function { -// __alloc_func holds a functor and an allocator. - -template -class __alloc_func; -template -class __default_alloc_func; - -template -class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)> { - _LIBCPP_COMPRESSED_PAIR(_Fp, __func_, _Ap, __alloc_); - -public: - using _Target _LIBCPP_NODEBUG = _Fp; - using _Alloc _LIBCPP_NODEBUG = _Ap; - - _LIBCPP_HIDE_FROM_ABI const _Target& __target() const { return __func_; } - - // WIN32 APIs may define __allocator, so use __get_allocator instead. - _LIBCPP_HIDE_FROM_ABI const _Alloc& __get_allocator() const { return __alloc_; } - - _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(_Target&& __f) : __func_(std::move(__f)), __alloc_() {} - - _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(const _Target& __f, const _Alloc& __a) : __func_(__f), __alloc_(__a) {} - - _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(const _Target& __f, _Alloc&& __a) - : __func_(__f), __alloc_(std::move(__a)) {} - - _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(_Target&& __f, _Alloc&& __a) - : __func_(std::move(__f)), __alloc_(std::move(__a)) {} - - _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes&&... __arg) { - return std::__invoke_r<_Rp>(__func_, std::forward<_ArgTypes>(__arg)...); - } - - _LIBCPP_HIDE_FROM_ABI __alloc_func* __clone() const { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __rebind_alloc<__alloc_traits, __alloc_func> _AA; - _AA __a(__alloc_); - typedef __allocator_destructor<_AA> _Dp; - unique_ptr<__alloc_func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new ((void*)__hold.get()) __alloc_func(__func_, _Alloc(__a)); - return __hold.release(); - } - - _LIBCPP_HIDE_FROM_ABI void destroy() _NOEXCEPT { - __func_.~_Fp(); - __alloc_.~_Alloc(); - } - - _LIBCPP_HIDE_FROM_ABI static void __destroy_and_delete(__alloc_func* __f) { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __rebind_alloc<__alloc_traits, __alloc_func> _FunAlloc; - _FunAlloc __a(__f->__get_allocator()); - __f->destroy(); - __a.deallocate(__f, 1); - } -}; - -template -struct __deallocating_deleter { - _LIBCPP_HIDE_FROM_ABI void operator()(void* __p) const { - std::__libcpp_deallocate<_Tp>(static_cast<_Tp*>(__p), __element_count(1)); - } -}; - -template -class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> { - _Fp __f_; - -public: - using _Target _LIBCPP_NODEBUG = _Fp; - - _LIBCPP_HIDE_FROM_ABI const _Target& __target() const { return __f_; } - - _LIBCPP_HIDE_FROM_ABI explicit __default_alloc_func(_Target&& __f) : __f_(std::move(__f)) {} - - _LIBCPP_HIDE_FROM_ABI explicit __default_alloc_func(const _Target& __f) : __f_(__f) {} - - _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes&&... __arg) { - return std::__invoke_r<_Rp>(__f_, std::forward<_ArgTypes>(__arg)...); - } - - _LIBCPP_HIDE_FROM_ABI __default_alloc_func* __clone() const { - using _Self = __default_alloc_func; - unique_ptr<_Self, __deallocating_deleter<_Self>> __hold(std::__libcpp_allocate<_Self>(__element_count(1))); - _Self* __res = ::new ((void*)__hold.get()) _Self(__f_); - (void)__hold.release(); - return __res; - } - - _LIBCPP_HIDE_FROM_ABI void destroy() _NOEXCEPT { __f_.~_Target(); } - - _LIBCPP_HIDE_FROM_ABI static void __destroy_and_delete(__default_alloc_func* __f) { - __f->destroy(); - std::__libcpp_deallocate<__default_alloc_func>(__f, __element_count(1)); - } -}; - // __base provides an abstract interface for copyable functors. template -class _LIBCPP_TEMPLATE_VIS __base; +class __base; template class __base<_Rp(_ArgTypes...)> { @@ -257,84 +151,38 @@ public: // __func implements __base for a given functor type. -template +template class __func; -template -class __func<_Fp, _Alloc, _Rp(_ArgTypes...)> : public __base<_Rp(_ArgTypes...)> { - __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> __f_; +template +class __func<_Fp, _Rp(_ArgTypes...)> : public __base<_Rp(_ArgTypes...)> { + _Fp __func_; public: - _LIBCPP_HIDE_FROM_ABI explicit __func(_Fp&& __f) : __f_(std::move(__f)) {} + _LIBCPP_HIDE_FROM_ABI explicit __func(_Fp&& __f) : __func_(std::move(__f)) {} + _LIBCPP_HIDE_FROM_ABI explicit __func(const _Fp& __f) : __func_(__f) {} - _LIBCPP_HIDE_FROM_ABI explicit __func(const _Fp& __f, const _Alloc& __a) : __f_(__f, __a) {} + _LIBCPP_HIDE_FROM_ABI_VIRTUAL __base<_Rp(_ArgTypes...)>* __clone() const override { return new __func(__func_); } - _LIBCPP_HIDE_FROM_ABI explicit __func(const _Fp& __f, _Alloc&& __a) : __f_(__f, std::move(__a)) {} + _LIBCPP_HIDE_FROM_ABI_VIRTUAL void __clone(__base<_Rp(_ArgTypes...)>* __p) const override { + ::new ((void*)__p) __func(__func_); + } - _LIBCPP_HIDE_FROM_ABI explicit __func(_Fp&& __f, _Alloc&& __a) : __f_(std::move(__f), std::move(__a)) {} - - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual __base<_Rp(_ArgTypes...)>* __clone() const; - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void __clone(__base<_Rp(_ArgTypes...)>*) const; - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy_deallocate() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual _Rp operator()(_ArgTypes&&... __arg); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL void destroy() _NOEXCEPT override { __func_.~_Fp(); } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL void destroy_deallocate() _NOEXCEPT override { delete this; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL _Rp operator()(_ArgTypes&&... __arg) override { + return std::__invoke_r<_Rp>(__func_, std::forward<_ArgTypes>(__arg)...); + } # if _LIBCPP_HAS_RTTI - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const void* target(const type_info&) const _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const std::type_info& target_type() const _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const void* target(const type_info& __ti) const _NOEXCEPT override { + if (__ti == typeid(_Fp)) + return std::addressof(__func_); + return nullptr; + } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const std::type_info& target_type() const _NOEXCEPT override { return typeid(_Fp); } # endif // _LIBCPP_HAS_RTTI }; -template -__base<_Rp(_ArgTypes...)>* __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone() const { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __rebind_alloc<__alloc_traits, __func> _Ap; - _Ap __a(__f_.__get_allocator()); - typedef __allocator_destructor<_Ap> _Dp; - unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new ((void*)__hold.get()) __func(__f_.__target(), _Alloc(__a)); - return __hold.release(); -} - -template -void __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone(__base<_Rp(_ArgTypes...)>* __p) const { - ::new ((void*)__p) __func(__f_.__target(), __f_.__get_allocator()); -} - -template -void __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy() _NOEXCEPT { - __f_.destroy(); -} - -template -void __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy_deallocate() _NOEXCEPT { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __rebind_alloc<__alloc_traits, __func> _Ap; - _Ap __a(__f_.__get_allocator()); - __f_.destroy(); - __a.deallocate(this, 1); -} - -template -_Rp __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&&... __arg) { - return __f_(std::forward<_ArgTypes>(__arg)...); -} - -# if _LIBCPP_HAS_RTTI - -template -const void* __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT { - if (__ti == typeid(_Fp)) - return std::addressof(__f_.__target()); - return nullptr; -} - -template -const std::type_info& __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target_type() const _NOEXCEPT { - return typeid(_Fp); -} - -# endif // _LIBCPP_HAS_RTTI - // __value_func creates a value-type from a __func. template @@ -354,29 +202,19 @@ class __value_func<_Rp(_ArgTypes...)> { public: _LIBCPP_HIDE_FROM_ABI __value_func() _NOEXCEPT : __f_(nullptr) {} - template - _LIBCPP_HIDE_FROM_ABI __value_func(_Fp&& __f, const _Alloc& __a) : __f_(nullptr) { - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __function::__func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun; - typedef __rebind_alloc<__alloc_traits, _Fun> _FunAlloc; + template , __value_func>::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI explicit __value_func(_Fp&& __f) : __f_(nullptr) { + typedef __function::__func<_Fp, _Rp(_ArgTypes...)> _Fun; if (__function::__not_null(__f)) { - _FunAlloc __af(__a); - if (sizeof(_Fun) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value && - is_nothrow_copy_constructible<_FunAlloc>::value) { - __f_ = ::new ((void*)&__buf_) _Fun(std::move(__f), _Alloc(__af)); + if (sizeof(_Fun) <= sizeof(__buf_) && is_nothrow_copy_constructible<_Fp>::value) { + __f_ = ::new (std::addressof(__buf_)) _Fun(std::move(__f)); } else { - typedef __allocator_destructor<_FunAlloc> _Dp; - unique_ptr<__func, _Dp> __hold(__af.allocate(1), _Dp(__af, 1)); - ::new ((void*)__hold.get()) _Fun(std::move(__f), _Alloc(__a)); - __f_ = __hold.release(); + __f_ = new _Fun(std::move(__f)); } } } - template , __value_func>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI explicit __value_func(_Fp&& __f) : __value_func(std::forward<_Fp>(__f), allocator<_Fp>()) {} - _LIBCPP_HIDE_FROM_ABI __value_func(const __value_func& __f) { if (__f.__f_ == nullptr) __f_ = nullptr; @@ -432,12 +270,12 @@ public: _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes&&... __args) const { if (__f_ == nullptr) - __throw_bad_function_call(); + std::__throw_bad_function_call(); return (*__f_)(std::forward<_ArgTypes>(__args)...); } _LIBCPP_HIDE_FROM_ABI void swap(__value_func& __f) _NOEXCEPT { - if (&__f == this) + if (std::addressof(__f) == this) return; if ((void*)__f_ == &__buf_ && (void*)__f.__f_ == &__f.__buf_) { _LIBCPP_SUPPRESS_DEPRECATED_PUSH @@ -539,22 +377,22 @@ private: template _LIBCPP_HIDE_FROM_ABI static void* __large_clone(const void* __s) { const _Fun* __f = static_cast(__s); - return __f->__clone(); + return new _Fun(*__f); } template _LIBCPP_HIDE_FROM_ABI static void __large_destroy(void* __s) { - _Fun::__destroy_and_delete(static_cast<_Fun*>(__s)); + delete static_cast<_Fun*>(__s); } template _LIBCPP_HIDE_FROM_ABI static const __policy* __choose_policy(/* is_small = */ false_type) { static constexpr __policy __policy = { - &__large_clone<_Fun>, - &__large_destroy<_Fun>, + std::addressof(__large_clone<_Fun>), + std::addressof(__large_destroy<_Fun>), false, # if _LIBCPP_HAS_RTTI - &typeid(typename _Fun::_Target) + &typeid(_Fun) # else nullptr # endif @@ -569,7 +407,7 @@ private: nullptr, false, # if _LIBCPP_HAS_RTTI - &typeid(typename _Fun::_Target) + &typeid(_Fun) # else nullptr # endif @@ -583,42 +421,7 @@ private: template using __fast_forward _LIBCPP_NODEBUG = __conditional_t::value, _Tp, _Tp&&>; -// __policy_invoker calls an instance of __alloc_func held in __policy_storage. - -template -struct __policy_invoker; - -template -struct __policy_invoker<_Rp(_ArgTypes...)> { - typedef _Rp (*__Call)(const __policy_storage*, __fast_forward<_ArgTypes>...); - - __Call __call_; - - // Creates an invoker that throws bad_function_call. - _LIBCPP_HIDE_FROM_ABI __policy_invoker() : __call_(&__call_empty) {} - - // Creates an invoker that calls the given instance of __func. - template - _LIBCPP_HIDE_FROM_ABI static __policy_invoker __create() { - return __policy_invoker(&__call_impl<_Fun>); - } - -private: - _LIBCPP_HIDE_FROM_ABI explicit __policy_invoker(__Call __c) : __call_(__c) {} - - _LIBCPP_HIDE_FROM_ABI static _Rp __call_empty(const __policy_storage*, __fast_forward<_ArgTypes>...) { - __throw_bad_function_call(); - } - - template - _LIBCPP_HIDE_FROM_ABI static _Rp __call_impl(const __policy_storage* __buf, __fast_forward<_ArgTypes>... __args) { - _Fun* __f = reinterpret_cast<_Fun*>(__use_small_storage<_Fun>::value ? &__buf->__small : __buf->__large); - return (*__f)(std::forward<_ArgTypes>(__args)...); - } -}; - -// __policy_func uses a __policy and __policy_invoker to create a type-erased, -// copyable functor. +// __policy_func uses a __policy to create a type-erased, copyable functor. template class __policy_func; @@ -628,69 +431,52 @@ class __policy_func<_Rp(_ArgTypes...)> { // Inline storage for small objects. __policy_storage __buf_; - // Calls the value stored in __buf_. This could technically be part of - // policy, but storing it here eliminates a level of indirection inside - // operator(). - typedef __function::__policy_invoker<_Rp(_ArgTypes...)> __invoker; - __invoker __invoker_; + using _ErasedFunc _LIBCPP_NODEBUG = _Rp(const __policy_storage*, __fast_forward<_ArgTypes>...); + + _ErasedFunc* __func_; // The policy that describes how to move / copy / destroy __buf_. Never // null, even if the function is empty. const __policy* __policy_; -public: - _LIBCPP_HIDE_FROM_ABI __policy_func() : __policy_(__policy::__create_empty()) {} - - template - _LIBCPP_HIDE_FROM_ABI __policy_func(_Fp&& __f, const _Alloc& __a) : __policy_(__policy::__create_empty()) { - typedef __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun; - typedef allocator_traits<_Alloc> __alloc_traits; - typedef __rebind_alloc<__alloc_traits, _Fun> _FunAlloc; - - if (__function::__not_null(__f)) { - __invoker_ = __invoker::template __create<_Fun>(); - __policy_ = __policy::__create<_Fun>(); - - _FunAlloc __af(__a); - if (__use_small_storage<_Fun>()) { - ::new ((void*)&__buf_.__small) _Fun(std::move(__f), _Alloc(__af)); - } else { - typedef __allocator_destructor<_FunAlloc> _Dp; - unique_ptr<_Fun, _Dp> __hold(__af.allocate(1), _Dp(__af, 1)); - ::new ((void*)__hold.get()) _Fun(std::move(__f), _Alloc(__af)); - __buf_.__large = __hold.release(); - } - } + _LIBCPP_HIDE_FROM_ABI static _Rp __empty_func(const __policy_storage*, __fast_forward<_ArgTypes>...) { + std::__throw_bad_function_call(); } + template + _LIBCPP_HIDE_FROM_ABI static _Rp __call_func(const __policy_storage* __buf, __fast_forward<_ArgTypes>... __args) { + _Fun* __func = reinterpret_cast<_Fun*>(__use_small_storage<_Fun>::value ? &__buf->__small : __buf->__large); + + return std::__invoke_r<_Rp>(*__func, std::forward<_ArgTypes>(__args)...); + } + +public: + _LIBCPP_HIDE_FROM_ABI __policy_func() : __func_(__empty_func), __policy_(__policy::__create_empty()) {} + template , __policy_func>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI explicit __policy_func(_Fp&& __f) : __policy_(__policy::__create_empty()) { - typedef __default_alloc_func<_Fp, _Rp(_ArgTypes...)> _Fun; - if (__function::__not_null(__f)) { - __invoker_ = __invoker::template __create<_Fun>(); - __policy_ = __policy::__create<_Fun>(); - if (__use_small_storage<_Fun>()) { - ::new ((void*)&__buf_.__small) _Fun(std::move(__f)); + __func_ = __call_func<_Fp>; + __policy_ = __policy::__create<_Fp>(); + if (__use_small_storage<_Fp>()) { + ::new ((void*)&__buf_.__small) _Fp(std::move(__f)); } else { - unique_ptr<_Fun, __deallocating_deleter<_Fun>> __hold(std::__libcpp_allocate<_Fun>(__element_count(1))); - __buf_.__large = ::new ((void*)__hold.get()) _Fun(std::move(__f)); - (void)__hold.release(); + __buf_.__large = ::new _Fp(std::move(__f)); } } } _LIBCPP_HIDE_FROM_ABI __policy_func(const __policy_func& __f) - : __buf_(__f.__buf_), __invoker_(__f.__invoker_), __policy_(__f.__policy_) { + : __buf_(__f.__buf_), __func_(__f.__func_), __policy_(__f.__policy_) { if (__policy_->__clone) __buf_.__large = __policy_->__clone(__f.__buf_.__large); } _LIBCPP_HIDE_FROM_ABI __policy_func(__policy_func&& __f) - : __buf_(__f.__buf_), __invoker_(__f.__invoker_), __policy_(__f.__policy_) { + : __buf_(__f.__buf_), __func_(__f.__func_), __policy_(__f.__policy_) { if (__policy_->__destroy) { - __f.__policy_ = __policy::__create_empty(); - __f.__invoker_ = __invoker(); + __f.__policy_ = __policy::__create_empty(); + __f.__func_ = {}; } } @@ -700,30 +486,30 @@ public: } _LIBCPP_HIDE_FROM_ABI __policy_func& operator=(__policy_func&& __f) { - *this = nullptr; - __buf_ = __f.__buf_; - __invoker_ = __f.__invoker_; - __policy_ = __f.__policy_; - __f.__policy_ = __policy::__create_empty(); - __f.__invoker_ = __invoker(); + *this = nullptr; + __buf_ = __f.__buf_; + __func_ = __f.__func_; + __policy_ = __f.__policy_; + __f.__policy_ = __policy::__create_empty(); + __f.__func_ = {}; return *this; } _LIBCPP_HIDE_FROM_ABI __policy_func& operator=(nullptr_t) { const __policy* __p = __policy_; __policy_ = __policy::__create_empty(); - __invoker_ = __invoker(); + __func_ = {}; if (__p->__destroy) __p->__destroy(__buf_.__large); return *this; } _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes&&... __args) const { - return __invoker_.__call_(std::addressof(__buf_), std::forward<_ArgTypes>(__args)...); + return __func_(std::addressof(__buf_), std::forward<_ArgTypes>(__args)...); } _LIBCPP_HIDE_FROM_ABI void swap(__policy_func& __f) { - std::swap(__invoker_, __f.__invoker_); + std::swap(__func_, __f.__func_); std::swap(__policy_, __f.__policy_); std::swap(__buf_, __f.__buf_); } @@ -750,14 +536,14 @@ public: extern "C" void* _Block_copy(const void*); extern "C" void _Block_release(const void*); -template -class __func<_Rp1 (^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base<_Rp(_ArgTypes...)> { +template +class __func<_Rp1 (^)(_ArgTypes1...), _Rp(_ArgTypes...)> : public __base<_Rp(_ArgTypes...)> { typedef _Rp1 (^__block_type)(_ArgTypes1...); __block_type __f_; public: _LIBCPP_HIDE_FROM_ABI explicit __func(__block_type const& __f) -# if _LIBCPP_HAS_OBJC_ARC +# if __has_feature(objc_arc) : __f_(__f) # else : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) @@ -767,15 +553,6 @@ public: // [TODO] add && to save on a retain - _LIBCPP_HIDE_FROM_ABI explicit __func(__block_type __f, const _Alloc& /* unused */) -# if _LIBCPP_HAS_OBJC_ARC - : __f_(__f) -# else - : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) -# endif - { - } - _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual __base<_Rp(_ArgTypes...)>* __clone() const { _LIBCPP_ASSERT_INTERNAL( false, @@ -790,7 +567,7 @@ public: } _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy() _NOEXCEPT { -# if !_LIBCPP_HAS_OBJC_ARC +# if !__has_feature(objc_arc) if (__f_) _Block_release(__f_); # endif @@ -822,12 +599,12 @@ public: # endif // _LIBCPP_HAS_RTTI }; -# endif // _LIBCPP_HAS_EXTENSION_BLOCKS +# endif // _LIBCPP_HAS_BLOCKS_RUNTIME } // namespace __function template -class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)> +class function<_Rp(_ArgTypes...)> : public __function::__maybe_derive_from_unary_function<_Rp(_ArgTypes...)>, public __function::__maybe_derive_from_binary_function<_Rp(_ArgTypes...)> { # ifndef _LIBCPP_ABI_OPTIMIZED_FUNCTION @@ -954,7 +731,7 @@ function<_Rp(_ArgTypes...)>::function(_Fp __f) : __f_(std::move(__f)) {} # if _LIBCPP_STD_VER <= 14 template template -function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc& __a, _Fp __f) : __f_(std::move(__f), __a) {} +function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&, _Fp __f) : __f_(std::move(__f)) {} # endif template diff --git a/lib/libcxx/include/__functional/hash.h b/lib/libcxx/include/__functional/hash.h index 28b2635ab1..489a6f00b8 100644 --- a/lib/libcxx/include/__functional/hash.h +++ b/lib/libcxx/include/__functional/hash.h @@ -13,11 +13,14 @@ #include <__cstddef/nullptr_t.h> #include <__functional/unary_function.h> #include <__fwd/functional.h> +#include <__memory/addressof.h> #include <__type_traits/conjunction.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_enum.h> +#include <__type_traits/is_floating_point.h> +#include <__type_traits/is_integral.h> #include <__type_traits/underlying_type.h> #include <__utility/pair.h> #include <__utility/swap.h> @@ -33,7 +36,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template inline _LIBCPP_HIDE_FROM_ABI _Size __loadword(const void* __p) { _Size __r; - std::memcpy(&__r, __p, sizeof(__r)); + std::memcpy(std::addressof(__r), __p, sizeof(__r)); return __r; } @@ -63,10 +66,10 @@ struct __murmur2_or_cityhash<_Size, 32> { switch (__len) { case 3: __h ^= static_cast<_Size>(__data[2] << 16); - _LIBCPP_FALLTHROUGH(); + [[__fallthrough__]]; case 2: __h ^= static_cast<_Size>(__data[1] << 8); - _LIBCPP_FALLTHROUGH(); + [[__fallthrough__]]; case 1: __h ^= __data[0]; __h *= __m; @@ -237,6 +240,14 @@ private: } }; +#if _LIBCPP_AVAILABILITY_HAS_HASH_MEMORY +[[__gnu__::__pure__]] _LIBCPP_EXPORTED_FROM_ABI size_t __hash_memory(_LIBCPP_NOESCAPE const void*, size_t) _NOEXCEPT; +#else +_LIBCPP_HIDE_FROM_ABI inline size_t __hash_memory(const void* __ptr, size_t __size) _NOEXCEPT { + return __murmur2_or_cityhash()(__ptr, __size); +} +#endif + template struct __scalar_hash; @@ -276,7 +287,7 @@ struct __scalar_hash<_Tp, 2> : public __unary_function<_Tp, size_t> { } __s; } __u; __u.__t = __v; - return __murmur2_or_cityhash()(&__u, sizeof(__u)); + return std::__hash_memory(std::addressof(__u), sizeof(__u)); } }; @@ -292,7 +303,7 @@ struct __scalar_hash<_Tp, 3> : public __unary_function<_Tp, size_t> { } __s; } __u; __u.__t = __v; - return __murmur2_or_cityhash()(&__u, sizeof(__u)); + return std::__hash_memory(std::addressof(__u), sizeof(__u)); } }; @@ -309,7 +320,7 @@ struct __scalar_hash<_Tp, 4> : public __unary_function<_Tp, size_t> { } __s; } __u; __u.__t = __v; - return __murmur2_or_cityhash()(&__u, sizeof(__u)); + return std::__hash_memory(std::addressof(__u), sizeof(__u)); } }; @@ -325,133 +336,54 @@ _LIBCPP_HIDE_FROM_ABI inline size_t __hash_combine(size_t __lhs, size_t __rhs) _ } template -struct _LIBCPP_TEMPLATE_VIS hash<_Tp*> : public __unary_function<_Tp*, size_t> { +struct hash<_Tp*> : public __unary_function<_Tp*, size_t> { _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp* __v) const _NOEXCEPT { union { _Tp* __t; size_t __a; } __u; __u.__t = __v; - return __murmur2_or_cityhash()(&__u, sizeof(__u)); + return std::__hash_memory(std::addressof(__u), sizeof(__u)); } }; -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(bool __v) const _NOEXCEPT { return static_cast(__v); } +template +struct __hash_impl { + __hash_impl() = delete; + __hash_impl(__hash_impl const&) = delete; + __hash_impl& operator=(__hash_impl const&) = delete; }; -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(char __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(signed char __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned char __v) const _NOEXCEPT { return static_cast(__v); } -}; - -#if _LIBCPP_HAS_CHAR8_T -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(char8_t __v) const _NOEXCEPT { return static_cast(__v); } -}; -#endif // _LIBCPP_HAS_CHAR8_T - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(char16_t __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(char32_t __v) const _NOEXCEPT { return static_cast(__v); } -}; - -#if _LIBCPP_HAS_WIDE_CHARACTERS -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(wchar_t __v) const _NOEXCEPT { return static_cast(__v); } -}; -#endif // _LIBCPP_HAS_WIDE_CHARACTERS - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(short __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned short __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(int __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned int __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(long __v) const _NOEXCEPT { return static_cast(__v); } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned long __v) const _NOEXCEPT { - static_assert(sizeof(size_t) >= sizeof(unsigned long), - "This would be a terrible hash function on a platform where size_t is smaller than unsigned long"); - return static_cast(__v); +template +struct __hash_impl<_Tp, __enable_if_t::value> > : __unary_function<_Tp, size_t> { + _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT { + using type = __underlying_type_t<_Tp>; + return hash()(static_cast(__v)); } }; -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash {}; +template +struct __hash_impl<_Tp, __enable_if_t::value && (sizeof(_Tp) <= sizeof(size_t))> > + : __unary_function<_Tp, size_t> { + _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT { return static_cast(__v); } +}; -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash {}; +template +struct __hash_impl<_Tp, __enable_if_t::value && (sizeof(_Tp) > sizeof(size_t))> > + : __scalar_hash<_Tp> {}; -#if _LIBCPP_HAS_INT128 - -template <> -struct _LIBCPP_TEMPLATE_VIS hash<__int128_t> : public __scalar_hash<__int128_t> {}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash<__uint128_t> : public __scalar_hash<__uint128_t> {}; - -#endif - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash { - _LIBCPP_HIDE_FROM_ABI size_t operator()(float __v) const _NOEXCEPT { +template +struct __hash_impl<_Tp, __enable_if_t::value> > : __scalar_hash<_Tp> { + _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT { // -0.0 and 0.0 should return same hash if (__v == 0.0f) return 0; - return __scalar_hash::operator()(__v); + return __scalar_hash<_Tp>::operator()(__v); } }; template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash { - _LIBCPP_HIDE_FROM_ABI size_t operator()(double __v) const _NOEXCEPT { - // -0.0 and 0.0 should return same hash - if (__v == 0.0) - return 0; - return __scalar_hash::operator()(__v); - } -}; - -template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash { +struct __hash_impl : __scalar_hash { _LIBCPP_HIDE_FROM_ABI size_t operator()(long double __v) const _NOEXCEPT { // -0.0 and 0.0 should return same hash if (__v == 0.0L) @@ -492,27 +424,13 @@ struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash::value> -struct _LIBCPP_TEMPLATE_VIS __enum_hash : public __unary_function<_Tp, size_t> { - _LIBCPP_HIDE_FROM_ABI size_t operator()(_Tp __v) const _NOEXCEPT { - typedef typename underlying_type<_Tp>::type type; - return hash()(static_cast(__v)); - } -}; template -struct _LIBCPP_TEMPLATE_VIS __enum_hash<_Tp, false> { - __enum_hash() = delete; - __enum_hash(__enum_hash const&) = delete; - __enum_hash& operator=(__enum_hash const&) = delete; -}; - -template -struct _LIBCPP_TEMPLATE_VIS hash : public __enum_hash<_Tp> {}; +struct hash : public __hash_impl<_Tp> {}; #if _LIBCPP_STD_VER >= 17 template <> -struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { +struct hash : public __unary_function { _LIBCPP_HIDE_FROM_ABI size_t operator()(nullptr_t) const _NOEXCEPT { return 662607004ull; } }; #endif diff --git a/lib/libcxx/include/__functional/mem_fun_ref.h b/lib/libcxx/include/__functional/mem_fun_ref.h index c344420b02..68223772a1 100644 --- a/lib/libcxx/include/__functional/mem_fun_ref.h +++ b/lib/libcxx/include/__functional/mem_fun_ref.h @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_t : public __unary_function<_Tp*, _Sp> { +class _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_t : public __unary_function<_Tp*, _Sp> { _Sp (_Tp::*__p_)(); public: @@ -32,7 +32,7 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_t : public __binary_function<_Tp*, _Ap, _Sp> { +class _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_t : public __binary_function<_Tp*, _Ap, _Sp> { _Sp (_Tp::*__p_)(_Ap); public: @@ -51,7 +51,7 @@ _LIBCPP_DEPRECATED_IN_CXX11 inline _LIBCPP_HIDE_FROM_ABI mem_fun1_t<_Sp, _Tp, _A } template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_ref_t : public __unary_function<_Tp, _Sp> { +class _LIBCPP_DEPRECATED_IN_CXX11 mem_fun_ref_t : public __unary_function<_Tp, _Sp> { _Sp (_Tp::*__p_)(); public: @@ -60,7 +60,7 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_ref_t : public __binary_function<_Tp, _Ap, _Sp> { +class _LIBCPP_DEPRECATED_IN_CXX11 mem_fun1_ref_t : public __binary_function<_Tp, _Ap, _Sp> { _Sp (_Tp::*__p_)(_Ap); public: @@ -80,7 +80,7 @@ mem_fun_ref(_Sp (_Tp::*__f)(_Ap)) { } template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_t : public __unary_function { +class _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_t : public __unary_function { _Sp (_Tp::*__p_)() const; public: @@ -89,8 +89,7 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS -_LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_t : public __binary_function { +class _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_t : public __binary_function { _Sp (_Tp::*__p_)(_Ap) const; public: @@ -110,7 +109,7 @@ mem_fun(_Sp (_Tp::*__f)(_Ap) const) { } template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_ref_t : public __unary_function<_Tp, _Sp> { +class _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun_ref_t : public __unary_function<_Tp, _Sp> { _Sp (_Tp::*__p_)() const; public: @@ -119,7 +118,7 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_ref_t : public __binary_function<_Tp, _Ap, _Sp> { +class _LIBCPP_DEPRECATED_IN_CXX11 const_mem_fun1_ref_t : public __binary_function<_Tp, _Ap, _Sp> { _Sp (_Tp::*__p_)(_Ap) const; public: diff --git a/lib/libcxx/include/__functional/operations.h b/lib/libcxx/include/__functional/operations.h index 67d9da289a..7b0ea11db5 100644 --- a/lib/libcxx/include/__functional/operations.h +++ b/lib/libcxx/include/__functional/operations.h @@ -13,6 +13,7 @@ #include <__config> #include <__functional/binary_function.h> #include <__functional/unary_function.h> +#include <__fwd/functional.h> #include <__type_traits/desugars_to.h> #include <__type_traits/is_integral.h> #include <__utility/forward.h> @@ -30,7 +31,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS plus : __binary_function<_Tp, _Tp, _Tp> { +struct plus : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x + __y; @@ -48,7 +49,7 @@ inline const bool __desugars_to_v<__plus_tag, plus, _Tp, _Up> = true; #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS plus { +struct plus { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) + std::forward<_T2>(__u))) // @@ -64,7 +65,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS minus : __binary_function<_Tp, _Tp, _Tp> { +struct minus : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x - __y; @@ -74,7 +75,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(minus); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS minus { +struct minus { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) - std::forward<_T2>(__u))) // @@ -90,7 +91,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS multiplies : __binary_function<_Tp, _Tp, _Tp> { +struct multiplies : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x * __y; @@ -100,7 +101,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(multiplies); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS multiplies { +struct multiplies { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) * std::forward<_T2>(__u))) // @@ -116,7 +117,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS divides : __binary_function<_Tp, _Tp, _Tp> { +struct divides : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x / __y; @@ -126,7 +127,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(divides); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS divides { +struct divides { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) / std::forward<_T2>(__u))) // @@ -142,7 +143,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS modulus : __binary_function<_Tp, _Tp, _Tp> { +struct modulus : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x % __y; @@ -152,7 +153,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(modulus); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS modulus { +struct modulus { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) % std::forward<_T2>(__u))) // @@ -168,7 +169,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS negate : __unary_function<_Tp, _Tp> { +struct negate : __unary_function<_Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x) const { return -__x; } }; @@ -176,7 +177,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(negate); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS negate { +struct negate { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_Tp&& __x) const noexcept(noexcept(-std::forward<_Tp>(__x))) // @@ -194,7 +195,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS bit_and : __binary_function<_Tp, _Tp, _Tp> { +struct bit_and : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x & __y; @@ -204,7 +205,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(bit_and); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS bit_and { +struct bit_and { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) & @@ -217,13 +218,13 @@ struct _LIBCPP_TEMPLATE_VIS bit_and { #if _LIBCPP_STD_VER >= 14 template -struct _LIBCPP_TEMPLATE_VIS bit_not : __unary_function<_Tp, _Tp> { +struct bit_not : __unary_function<_Tp, _Tp> { _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x) const { return ~__x; } }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(bit_not); template <> -struct _LIBCPP_TEMPLATE_VIS bit_not { +struct bit_not { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_Tp&& __x) const noexcept(noexcept(~std::forward<_Tp>(__x))) // @@ -239,7 +240,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS bit_or : __binary_function<_Tp, _Tp, _Tp> { +struct bit_or : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x | __y; @@ -249,7 +250,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(bit_or); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS bit_or { +struct bit_or { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) | std::forward<_T2>(__u))) // @@ -265,7 +266,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS bit_xor : __binary_function<_Tp, _Tp, _Tp> { +struct bit_xor : __binary_function<_Tp, _Tp, _Tp> { typedef _Tp __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI _Tp operator()(const _Tp& __x, const _Tp& __y) const { return __x ^ __y; @@ -275,7 +276,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(bit_xor); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS bit_xor { +struct bit_xor { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) ^ std::forward<_T2>(__u))) // @@ -293,7 +294,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS equal_to : __binary_function<_Tp, _Tp, bool> { +struct equal_to : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x == __y; @@ -303,7 +304,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(equal_to); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS equal_to { +struct equal_to { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) == std::forward<_T2>(__u))) // @@ -328,7 +329,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS not_equal_to : __binary_function<_Tp, _Tp, bool> { +struct not_equal_to : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x != __y; @@ -338,7 +339,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(not_equal_to); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS not_equal_to { +struct not_equal_to { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) != std::forward<_T2>(__u))) // @@ -349,12 +350,8 @@ struct _LIBCPP_TEMPLATE_VIS not_equal_to { }; #endif -#if _LIBCPP_STD_VER >= 14 -template -#else template -#endif -struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { +struct less : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x < __y; @@ -370,7 +367,7 @@ inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _T #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS less { +struct less { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) < std::forward<_T2>(__u))) // @@ -392,7 +389,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS less_equal : __binary_function<_Tp, _Tp, bool> { +struct less_equal : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x <= __y; @@ -402,7 +399,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less_equal); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS less_equal { +struct less_equal { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) <= std::forward<_T2>(__u))) // @@ -418,7 +415,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS greater_equal : __binary_function<_Tp, _Tp, bool> { +struct greater_equal : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x >= __y; @@ -428,7 +425,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater_equal); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS greater_equal { +struct greater_equal { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) >= @@ -444,7 +441,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> { +struct greater : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x > __y; @@ -457,7 +454,7 @@ inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true; #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS greater { +struct greater { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) > std::forward<_T2>(__u))) // @@ -478,7 +475,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS logical_and : __binary_function<_Tp, _Tp, bool> { +struct logical_and : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x && __y; @@ -488,7 +485,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(logical_and); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS logical_and { +struct logical_and { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) && std::forward<_T2>(__u))) // @@ -504,7 +501,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS logical_not : __unary_function<_Tp, bool> { +struct logical_not : __unary_function<_Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x) const { return !__x; } }; @@ -512,7 +509,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(logical_not); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS logical_not { +struct logical_not { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_Tp&& __x) const noexcept(noexcept(!std::forward<_Tp>(__x))) // @@ -528,7 +525,7 @@ template #else template #endif -struct _LIBCPP_TEMPLATE_VIS logical_or : __binary_function<_Tp, _Tp, bool> { +struct logical_or : __binary_function<_Tp, _Tp, bool> { typedef bool __result_type; // used by valarray _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __x, const _Tp& __y) const { return __x || __y; @@ -538,7 +535,7 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(logical_or); #if _LIBCPP_STD_VER >= 14 template <> -struct _LIBCPP_TEMPLATE_VIS logical_or { +struct logical_or { template _LIBCPP_CONSTEXPR_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI auto operator()(_T1&& __t, _T2&& __u) const noexcept(noexcept(std::forward<_T1>(__t) || std::forward<_T2>(__u))) // diff --git a/lib/libcxx/include/__functional/pointer_to_binary_function.h b/lib/libcxx/include/__functional/pointer_to_binary_function.h index e345250dcd..eb0e7674ee 100644 --- a/lib/libcxx/include/__functional/pointer_to_binary_function.h +++ b/lib/libcxx/include/__functional/pointer_to_binary_function.h @@ -22,8 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) template -class _LIBCPP_TEMPLATE_VIS -_LIBCPP_DEPRECATED_IN_CXX11 pointer_to_binary_function : public __binary_function<_Arg1, _Arg2, _Result> { +class _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_binary_function : public __binary_function<_Arg1, _Arg2, _Result> { _Result (*__f_)(_Arg1, _Arg2); public: diff --git a/lib/libcxx/include/__functional/pointer_to_unary_function.h b/lib/libcxx/include/__functional/pointer_to_unary_function.h index 3a5d153d36..459d24abdd 100644 --- a/lib/libcxx/include/__functional/pointer_to_unary_function.h +++ b/lib/libcxx/include/__functional/pointer_to_unary_function.h @@ -22,8 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_BINDERS) template -class _LIBCPP_TEMPLATE_VIS -_LIBCPP_DEPRECATED_IN_CXX11 pointer_to_unary_function : public __unary_function<_Arg, _Result> { +class _LIBCPP_DEPRECATED_IN_CXX11 pointer_to_unary_function : public __unary_function<_Arg, _Result> { _Result (*__f_)(_Arg); public: diff --git a/lib/libcxx/include/__functional/reference_wrapper.h b/lib/libcxx/include/__functional/reference_wrapper.h index d6cd6428f2..148703b21d 100644 --- a/lib/libcxx/include/__functional/reference_wrapper.h +++ b/lib/libcxx/include/__functional/reference_wrapper.h @@ -11,13 +11,18 @@ #define _LIBCPP___FUNCTIONAL_REFERENCE_WRAPPER_H #include <__compare/synth_three_way.h> -#include <__concepts/boolean_testable.h> +#include <__concepts/convertible_to.h> #include <__config> #include <__functional/weak_result_type.h> #include <__memory/addressof.h> +#include <__type_traits/common_reference.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_const.h> +#include <__type_traits/is_core_convertible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/is_specialization.h> #include <__type_traits/remove_cvref.h> #include <__type_traits/void_t.h> #include <__utility/declval.h> @@ -30,7 +35,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -class _LIBCPP_TEMPLATE_VIS reference_wrapper : public __weak_result_type<_Tp> { +class reference_wrapper : public __weak_result_type<_Tp> { public: // types typedef _Tp type; @@ -44,7 +49,7 @@ private: public: template ()))>, - __enable_if_t::value, int> = 0> + __enable_if_t, reference_wrapper>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference_wrapper(_Up&& __u) _NOEXCEPT_(noexcept(__fun(std::declval<_Up>()))) { type& __f = static_cast<_Up&&>(__u); @@ -74,7 +79,7 @@ public: _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(reference_wrapper __x, reference_wrapper __y) requires requires { - { __x.get() == __y.get() } -> __boolean_testable; + { __x.get() == __y.get() } -> __core_convertible_to; } { return __x.get() == __y.get(); @@ -82,7 +87,7 @@ public: _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(reference_wrapper __x, const _Tp& __y) requires requires { - { __x.get() == __y } -> __boolean_testable; + { __x.get() == __y } -> __core_convertible_to; } { return __x.get() == __y; @@ -90,7 +95,7 @@ public: _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(reference_wrapper __x, reference_wrapper __y) requires(!is_const_v<_Tp>) && requires { - { __x.get() == __y.get() } -> __boolean_testable; + { __x.get() == __y.get() } -> __core_convertible_to; } { return __x.get() == __y.get(); @@ -149,6 +154,37 @@ void ref(const _Tp&&) = delete; template void cref(const _Tp&&) = delete; +// Let desugars-to pass through std::reference_wrapper +template +inline const bool __desugars_to_v<_CanonicalTag, reference_wrapper<_Operation>, _Args...> = + __desugars_to_v<_CanonicalTag, _Operation, _Args...>; + +#if _LIBCPP_STD_VER >= 20 + +template +inline constexpr bool __is_ref_wrapper = __is_specialization_v<_Tp, reference_wrapper>; + +template +concept __ref_wrap_common_reference_exists_with = __is_ref_wrapper<_Rp> && requires { + typename common_reference_t; +} && convertible_to<_RpQual, common_reference_t>; + +template class _RpQual, template class _TpQual> + requires(__ref_wrap_common_reference_exists_with<_Rp, _Tp, _RpQual<_Rp>, _TpQual<_Tp>> && + !__ref_wrap_common_reference_exists_with<_Tp, _Rp, _TpQual<_Tp>, _RpQual<_Rp>>) +struct basic_common_reference<_Rp, _Tp, _RpQual, _TpQual> { + using type _LIBCPP_NODEBUG = common_reference_t>; +}; + +template class _TpQual, template class _RpQual> + requires(__ref_wrap_common_reference_exists_with<_Rp, _Tp, _RpQual<_Rp>, _TpQual<_Tp>> && + !__ref_wrap_common_reference_exists_with<_Tp, _Rp, _TpQual<_Tp>, _RpQual<_Rp>>) +struct basic_common_reference<_Tp, _Rp, _TpQual, _RpQual> { + using type _LIBCPP_NODEBUG = common_reference_t>; +}; + +#endif // _LIBCPP_STD_VER >= 20 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FUNCTIONAL_REFERENCE_WRAPPER_H diff --git a/lib/libcxx/include/__functional/unary_function.h b/lib/libcxx/include/__functional/unary_function.h index 769ffc9893..d6d5fae45b 100644 --- a/lib/libcxx/include/__functional/unary_function.h +++ b/lib/libcxx/include/__functional/unary_function.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION) template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 unary_function { +struct _LIBCPP_DEPRECATED_IN_CXX11 unary_function { typedef _Arg argument_type; typedef _Result result_type; }; @@ -36,11 +36,10 @@ struct __unary_function_keep_layout_base { }; #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION) -_LIBCPP_DIAGNOSTIC_PUSH -_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations") +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template using __unary_function _LIBCPP_NODEBUG = unary_function<_Arg, _Result>; -_LIBCPP_DIAGNOSTIC_POP +_LIBCPP_SUPPRESS_DEPRECATED_POP #else template using __unary_function _LIBCPP_NODEBUG = __unary_function_keep_layout_base<_Arg, _Result>; diff --git a/lib/libcxx/include/__functional/unary_negate.h b/lib/libcxx/include/__functional/unary_negate.h index 5bd487a97b..a93abc8e6b 100644 --- a/lib/libcxx/include/__functional/unary_negate.h +++ b/lib/libcxx/include/__functional/unary_negate.h @@ -22,8 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS) template -class _LIBCPP_TEMPLATE_VIS -_LIBCPP_DEPRECATED_IN_CXX17 unary_negate : public __unary_function { +class _LIBCPP_DEPRECATED_IN_CXX17 unary_negate : public __unary_function { _Predicate __pred_; public: diff --git a/lib/libcxx/include/__functional/weak_result_type.h b/lib/libcxx/include/__functional/weak_result_type.h index 233d86009a..aa462e4d5c 100644 --- a/lib/libcxx/include/__functional/weak_result_type.h +++ b/lib/libcxx/include/__functional/weak_result_type.h @@ -77,6 +77,7 @@ struct __maybe_derive_from_unary_function // bool is true template struct __maybe_derive_from_unary_function<_Tp, false> {}; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template ::value> struct __maybe_derive_from_binary_function // bool is true : public __derives_from_binary_function<_Tp>::type {}; @@ -99,6 +100,7 @@ struct __weak_result_type_imp<_Tp, false> template struct __weak_result_type : public __weak_result_type_imp<_Tp> {}; +_LIBCPP_SUPPRESS_DEPRECATED_POP // 0 argument case diff --git a/lib/libcxx/include/__fwd/array.h b/lib/libcxx/include/__fwd/array.h index 794779ae46..476de885c5 100644 --- a/lib/libcxx/include/__fwd/array.h +++ b/lib/libcxx/include/__fwd/array.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct _LIBCPP_TEMPLATE_VIS array; +struct array; template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp& get(array<_Tp, _Size>&) _NOEXCEPT; diff --git a/lib/libcxx/include/__fwd/bit_reference.h b/lib/libcxx/include/__fwd/bit_reference.h index 30462b6ce4..36058d59cc 100644 --- a/lib/libcxx/include/__fwd/bit_reference.h +++ b/lib/libcxx/include/__fwd/bit_reference.h @@ -20,9 +20,25 @@ _LIBCPP_BEGIN_NAMESPACE_STD template class __bit_iterator; +template +struct __bit_array; + template struct __size_difference_type_traits; +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void +__fill_masked_range(_StoragePointer __word, unsigned __clz, unsigned __ctz, bool __fill_val); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __trailing_mask(unsigned __clz); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __leading_mask(unsigned __ctz); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _StorageType __middle_mask(unsigned __clz, unsigned __ctz); + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FWD_BIT_REFERENCE_H diff --git a/lib/libcxx/include/__fwd/byte.h b/lib/libcxx/include/__fwd/byte.h index 0301833d93..6f2d6ae254 100644 --- a/lib/libcxx/include/__fwd/byte.h +++ b/lib/libcxx/include/__fwd/byte.h @@ -16,11 +16,11 @@ #endif #if _LIBCPP_STD_VER >= 17 -namespace std { // purposefully not versioned +_LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD enum class byte : unsigned char; -} // namespace std +_LIBCPP_END_UNVERSIONED_NAMESPACE_STD #endif // _LIBCPP_STD_VER >= 17 #endif // _LIBCPP___FWD_BYTE_H diff --git a/lib/libcxx/include/__fwd/complex.h b/lib/libcxx/include/__fwd/complex.h index 092d2e10b1..ff3c5c1e14 100644 --- a/lib/libcxx/include/__fwd/complex.h +++ b/lib/libcxx/include/__fwd/complex.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -class _LIBCPP_TEMPLATE_VIS complex; +class complex; #if _LIBCPP_STD_VER >= 26 diff --git a/lib/libcxx/include/__fwd/deque.h b/lib/libcxx/include/__fwd/deque.h index fd2fb5bb4b..35877a70e9 100644 --- a/lib/libcxx/include/__fwd/deque.h +++ b/lib/libcxx/include/__fwd/deque.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS deque; +class deque; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/format.h b/lib/libcxx/include/__fwd/format.h index 815e3e1922..b7f4cecb65 100644 --- a/lib/libcxx/include/__fwd/format.h +++ b/lib/libcxx/include/__fwd/format.h @@ -22,14 +22,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS basic_format_arg; +class basic_format_arg; template requires output_iterator<_OutIt, const _CharT&> -class _LIBCPP_TEMPLATE_VIS basic_format_context; +class basic_format_context; template -struct _LIBCPP_TEMPLATE_VIS formatter; +struct formatter; #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__fwd/fstream.h b/lib/libcxx/include/__fwd/fstream.h index e6c430dbf7..34a14dfa84 100644 --- a/lib/libcxx/include/__fwd/fstream.h +++ b/lib/libcxx/include/__fwd/fstream.h @@ -19,13 +19,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS basic_filebuf; +class basic_filebuf; template > -class _LIBCPP_TEMPLATE_VIS basic_ifstream; +class basic_ifstream; template > -class _LIBCPP_TEMPLATE_VIS basic_ofstream; +class basic_ofstream; template > -class _LIBCPP_TEMPLATE_VIS basic_fstream; +class basic_fstream; using filebuf = basic_filebuf; using ifstream = basic_ifstream; diff --git a/lib/libcxx/include/__fwd/functional.h b/lib/libcxx/include/__fwd/functional.h index 32c9ef33e4..a8c602417b 100644 --- a/lib/libcxx/include/__fwd/functional.h +++ b/lib/libcxx/include/__fwd/functional.h @@ -17,11 +17,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template -struct _LIBCPP_TEMPLATE_VIS hash; +#if _LIBCPP_STD_VER >= 14 +template +#else +template +#endif +struct less; template -class _LIBCPP_TEMPLATE_VIS reference_wrapper; +struct hash; + +template +class reference_wrapper; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/ios.h b/lib/libcxx/include/__fwd/ios.h index bb0c6eb49b..831624f4b1 100644 --- a/lib/libcxx/include/__fwd/ios.h +++ b/lib/libcxx/include/__fwd/ios.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD class _LIBCPP_EXPORTED_FROM_ABI ios_base; template > -class _LIBCPP_TEMPLATE_VIS basic_ios; +class basic_ios; using ios = basic_ios; #if _LIBCPP_HAS_WIDE_CHARACTERS diff --git a/lib/libcxx/include/__fwd/istream.h b/lib/libcxx/include/__fwd/istream.h index 66a6708544..91c21c1dab 100644 --- a/lib/libcxx/include/__fwd/istream.h +++ b/lib/libcxx/include/__fwd/istream.h @@ -19,10 +19,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS basic_istream; +class basic_istream; template > -class _LIBCPP_TEMPLATE_VIS basic_iostream; +class basic_iostream; using istream = basic_istream; using iostream = basic_iostream; diff --git a/lib/libcxx/include/__fwd/map.h b/lib/libcxx/include/__fwd/map.h new file mode 100644 index 0000000000..940298dd3e --- /dev/null +++ b/lib/libcxx/include/__fwd/map.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_MAP_H +#define _LIBCPP___FWD_MAP_H + +#include <__config> +#include <__fwd/functional.h> +#include <__fwd/memory.h> +#include <__fwd/pair.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , class _Allocator = allocator > > +class map; + +template , class _Allocator = allocator > > +class multimap; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_MAP_H diff --git a/lib/libcxx/include/__fwd/memory.h b/lib/libcxx/include/__fwd/memory.h index 564000997d..09ab5d861b 100644 --- a/lib/libcxx/include/__fwd/memory.h +++ b/lib/libcxx/include/__fwd/memory.h @@ -18,10 +18,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -class _LIBCPP_TEMPLATE_VIS allocator; +class allocator; template -class _LIBCPP_TEMPLATE_VIS shared_ptr; +class shared_ptr; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/memory_resource.h b/lib/libcxx/include/__fwd/memory_resource.h index ca9d377094..dc5a7ccb4a 100644 --- a/lib/libcxx/include/__fwd/memory_resource.h +++ b/lib/libcxx/include/__fwd/memory_resource.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace pmr { template -class _LIBCPP_AVAILABILITY_PMR _LIBCPP_TEMPLATE_VIS polymorphic_allocator; +class _LIBCPP_AVAILABILITY_PMR polymorphic_allocator; } // namespace pmr _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/ostream.h b/lib/libcxx/include/__fwd/ostream.h index ff5a3612ef..9ceb02dc28 100644 --- a/lib/libcxx/include/__fwd/ostream.h +++ b/lib/libcxx/include/__fwd/ostream.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS basic_ostream; +class basic_ostream; using ostream = basic_ostream; diff --git a/lib/libcxx/include/__fwd/pair.h b/lib/libcxx/include/__fwd/pair.h index b8ba2b7e92..cf07eabab6 100644 --- a/lib/libcxx/include/__fwd/pair.h +++ b/lib/libcxx/include/__fwd/pair.h @@ -20,7 +20,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct _LIBCPP_TEMPLATE_VIS pair; +struct pair; + +template +inline const bool __is_pair_v = false; + +template +inline const bool __is_pair_v > = true; template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, pair<_T1, _T2> >::type& diff --git a/lib/libcxx/include/__fwd/queue.h b/lib/libcxx/include/__fwd/queue.h index 50d99ad9c2..d32730da13 100644 --- a/lib/libcxx/include/__fwd/queue.h +++ b/lib/libcxx/include/__fwd/queue.h @@ -21,10 +21,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS queue; +class queue; template , class _Compare = less > -class _LIBCPP_TEMPLATE_VIS priority_queue; +class priority_queue; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/set.h b/lib/libcxx/include/__fwd/set.h new file mode 100644 index 0000000000..d5ef8d56b0 --- /dev/null +++ b/lib/libcxx/include/__fwd/set.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_SET_H +#define _LIBCPP___FWD_SET_H + +#include <__config> +#include <__fwd/functional.h> +#include <__fwd/memory.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , class _Allocator = allocator<_Key> > +class set; + +template , class _Allocator = allocator<_Key> > +class multiset; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_SET_H diff --git a/lib/libcxx/include/__fwd/sstream.h b/lib/libcxx/include/__fwd/sstream.h index c176db6e5a..fe637d8624 100644 --- a/lib/libcxx/include/__fwd/sstream.h +++ b/lib/libcxx/include/__fwd/sstream.h @@ -20,14 +20,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD template , class _Allocator = allocator<_CharT> > -class _LIBCPP_TEMPLATE_VIS basic_stringbuf; +class basic_stringbuf; template , class _Allocator = allocator<_CharT> > -class _LIBCPP_TEMPLATE_VIS basic_istringstream; +class basic_istringstream; template , class _Allocator = allocator<_CharT> > -class _LIBCPP_TEMPLATE_VIS basic_ostringstream; +class basic_ostringstream; template , class _Allocator = allocator<_CharT> > -class _LIBCPP_TEMPLATE_VIS basic_stringstream; +class basic_stringstream; using stringbuf = basic_stringbuf; using istringstream = basic_istringstream; diff --git a/lib/libcxx/include/__fwd/stack.h b/lib/libcxx/include/__fwd/stack.h index 7dab6c1a4f..922f03e940 100644 --- a/lib/libcxx/include/__fwd/stack.h +++ b/lib/libcxx/include/__fwd/stack.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS stack; +class stack; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/streambuf.h b/lib/libcxx/include/__fwd/streambuf.h index aee0ebb3ce..d3d9d466fd 100644 --- a/lib/libcxx/include/__fwd/streambuf.h +++ b/lib/libcxx/include/__fwd/streambuf.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS basic_streambuf; +class basic_streambuf; using streambuf = basic_streambuf; diff --git a/lib/libcxx/include/__fwd/string.h b/lib/libcxx/include/__fwd/string.h index 89dec82d6f..254d0c874a 100644 --- a/lib/libcxx/include/__fwd/string.h +++ b/lib/libcxx/include/__fwd/string.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct _LIBCPP_TEMPLATE_VIS char_traits; +struct char_traits; template <> struct char_traits; @@ -40,7 +40,7 @@ struct char_traits; #endif template , class _Allocator = allocator<_CharT> > -class _LIBCPP_TEMPLATE_VIS basic_string; +class basic_string; using string = basic_string; diff --git a/lib/libcxx/include/__fwd/string_view.h b/lib/libcxx/include/__fwd/string_view.h index b848cb7f60..8da963d105 100644 --- a/lib/libcxx/include/__fwd/string_view.h +++ b/lib/libcxx/include/__fwd/string_view.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS basic_string_view; +class basic_string_view; typedef basic_string_view string_view; #if _LIBCPP_HAS_CHAR8_T diff --git a/lib/libcxx/include/__fwd/subrange.h b/lib/libcxx/include/__fwd/subrange.h index 5b3a07e553..81db94ab73 100644 --- a/lib/libcxx/include/__fwd/subrange.h +++ b/lib/libcxx/include/__fwd/subrange.h @@ -28,7 +28,7 @@ enum class subrange_kind : bool { unsized, sized }; template _Sent, subrange_kind _Kind> requires(_Kind == subrange_kind::sized || !sized_sentinel_for<_Sent, _Iter>) -class _LIBCPP_TEMPLATE_VIS subrange; +class subrange; template requires((_Index == 0 && copyable<_Iter>) || _Index == 1) diff --git a/lib/libcxx/include/__fwd/tuple.h b/lib/libcxx/include/__fwd/tuple.h index 2ed32bc0df..fb922b29f3 100644 --- a/lib/libcxx/include/__fwd/tuple.h +++ b/lib/libcxx/include/__fwd/tuple.h @@ -19,15 +19,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct _LIBCPP_TEMPLATE_VIS tuple_element; +struct tuple_element; #ifndef _LIBCPP_CXX03_LANG template -class _LIBCPP_TEMPLATE_VIS tuple; +class tuple; template -struct _LIBCPP_TEMPLATE_VIS tuple_size; +struct tuple_size; template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, tuple<_Tp...> >::type& diff --git a/lib/libcxx/include/__fwd/variant.h b/lib/libcxx/include/__fwd/variant.h index 71c792f46a..fef7071b32 100644 --- a/lib/libcxx/include/__fwd/variant.h +++ b/lib/libcxx/include/__fwd/variant.h @@ -21,16 +21,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 template -class _LIBCPP_TEMPLATE_VIS variant; +class variant; template -struct _LIBCPP_TEMPLATE_VIS variant_size; +struct variant_size; template inline constexpr size_t variant_size_v = variant_size<_Tp>::value; template -struct _LIBCPP_TEMPLATE_VIS variant_alternative; +struct variant_alternative; template using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type; @@ -38,37 +38,28 @@ using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type; inline constexpr size_t variant_npos = static_cast(-1); template -_LIBCPP_HIDE_FROM_ABI -_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>& -get(variant<_Types...>&); +_LIBCPP_HIDE_FROM_ABI constexpr variant_alternative_t<_Ip, variant<_Types...>>& get(variant<_Types...>&); template -_LIBCPP_HIDE_FROM_ABI -_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>&& -get(variant<_Types...>&&); +_LIBCPP_HIDE_FROM_ABI constexpr variant_alternative_t<_Ip, variant<_Types...>>&& get(variant<_Types...>&&); template -_LIBCPP_HIDE_FROM_ABI -_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>& -get(const variant<_Types...>&); +_LIBCPP_HIDE_FROM_ABI constexpr const variant_alternative_t<_Ip, variant<_Types...>>& get(const variant<_Types...>&); template -_LIBCPP_HIDE_FROM_ABI -_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>&& -get(const variant<_Types...>&&); +_LIBCPP_HIDE_FROM_ABI constexpr const variant_alternative_t<_Ip, variant<_Types...>>&& get(const variant<_Types...>&&); template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp& get(variant<_Types...>&); +_LIBCPP_HIDE_FROM_ABI constexpr _Tp& get(variant<_Types...>&); template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp&& get(variant<_Types...>&&); +_LIBCPP_HIDE_FROM_ABI constexpr _Tp&& get(variant<_Types...>&&); template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp& get(const variant<_Types...>&); +_LIBCPP_HIDE_FROM_ABI constexpr const _Tp& get(const variant<_Types...>&); template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp&& -get(const variant<_Types...>&&); +_LIBCPP_HIDE_FROM_ABI constexpr const _Tp&& get(const variant<_Types...>&&); #endif // _LIBCPP_STD_VER >= 17 diff --git a/lib/libcxx/include/__fwd/vector.h b/lib/libcxx/include/__fwd/vector.h index 6980e40ec9..31084ec995 100644 --- a/lib/libcxx/include/__fwd/vector.h +++ b/lib/libcxx/include/__fwd/vector.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > -class _LIBCPP_TEMPLATE_VIS vector; +class vector; template class vector; diff --git a/lib/libcxx/include/__hash_table b/lib/libcxx/include/__hash_table index 9a82ec51da..78f2f3bfd2 100644 --- a/lib/libcxx/include/__hash_table +++ b/lib/libcxx/include/__hash_table @@ -29,6 +29,7 @@ #include <__memory/unique_ptr.h> #include <__new/launder.h> #include <__type_traits/can_extract_key.h> +#include <__type_traits/copy_cvref.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_const.h> @@ -108,9 +109,22 @@ struct __hash_node_base { _LIBCPP_HIDE_FROM_ABI explicit __hash_node_base(__next_pointer __next) _NOEXCEPT : __next_(__next) {} }; +template +struct __get_hash_node_value_type { + using type _LIBCPP_NODEBUG = _Tp; +}; + +template +struct __get_hash_node_value_type<__hash_value_type<_Key, _Tp> > { + using type _LIBCPP_NODEBUG = pair; +}; + +template +using __get_hash_node_value_type_t _LIBCPP_NODEBUG = typename __get_hash_node_value_type<_Tp>::type; + template struct __hash_node : public __hash_node_base< __rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > > { - typedef _Tp __node_value_type; + using __node_value_type _LIBCPP_NODEBUG = __get_hash_node_value_type_t<_Tp>; using _Base _LIBCPP_NODEBUG = __hash_node_base<__rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > >; using __next_pointer _LIBCPP_NODEBUG = typename _Base::__next_pointer; @@ -122,18 +136,20 @@ struct __hash_node : public __hash_node_base< __rebind_pointer_t<_VoidPtr, __has private: union { - _Tp __value_; + __node_value_type __value_; }; public: - _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } + _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { return __value_; } #else private: - _ALIGNAS_TYPE(_Tp) char __buffer_[sizeof(_Tp)]; + _ALIGNAS_TYPE(__node_value_type) char __buffer_[sizeof(__node_value_type)]; public: - _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return *std::__launder(reinterpret_cast<_Tp*>(&__buffer_)); } + _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { + return *std::__launder(reinterpret_cast<__node_value_type*>(&__buffer_)); + } #endif _LIBCPP_HIDE_FROM_ABI explicit __hash_node(__next_pointer __next, size_t __hash) : _Base(__next), __hash_(__hash) {} @@ -147,24 +163,24 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) { } inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) { - return __n < 2 ? __n : (size_t(1) << (numeric_limits::digits - __libcpp_clz(__n - 1))); + return __n < 2 ? __n : (size_t(1) << (numeric_limits::digits - std::__countl_zero(__n - 1))); } template class __hash_table; template -class _LIBCPP_TEMPLATE_VIS __hash_iterator; +class __hash_iterator; template -class _LIBCPP_TEMPLATE_VIS __hash_const_iterator; +class __hash_const_iterator; template -class _LIBCPP_TEMPLATE_VIS __hash_local_iterator; +class __hash_local_iterator; template -class _LIBCPP_TEMPLATE_VIS __hash_const_local_iterator; +class __hash_const_local_iterator; template -class _LIBCPP_TEMPLATE_VIS __hash_map_iterator; +class __hash_map_iterator; template -class _LIBCPP_TEMPLATE_VIS __hash_map_const_iterator; +class __hash_map_const_iterator; template struct __hash_key_value_types { @@ -191,18 +207,18 @@ struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(__container_value_type const& __v) { return __v.first; } - template ::value, int> = 0> + template , __node_value_type>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) { return __t.__get_value(); } - template ::value, int> = 0> + template , __container_value_type>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) { return __t; } - _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__node_value_type& __n) { - return std::addressof(__n.__get_value()); + _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__container_value_type& __n) { + return std::addressof(__n); } _LIBCPP_HIDE_FROM_ABI static pair __move(__node_value_type& __v) { return __v.__move(); } }; @@ -242,7 +258,7 @@ public: typedef typename __node_base_type::__next_pointer __next_pointer; - typedef _Tp __node_value_type; + using __node_value_type _LIBCPP_NODEBUG = __get_hash_node_value_type_t<_Tp>; typedef __rebind_pointer_t<_VoidPtr, __node_value_type> __node_value_type_pointer; typedef __rebind_pointer_t<_VoidPtr, const __node_value_type> __const_node_value_type_pointer; @@ -273,7 +289,7 @@ struct __make_hash_node_types { }; template -class _LIBCPP_TEMPLATE_VIS __hash_iterator { +class __hash_iterator { typedef __hash_node_types<_NodePtr> _NodeTypes; typedef _NodePtr __node_pointer; typedef typename _NodeTypes::__next_pointer __next_pointer; @@ -327,17 +343,17 @@ private: template friend class __hash_table; template - friend class _LIBCPP_TEMPLATE_VIS __hash_const_iterator; + friend class __hash_const_iterator; template - friend class _LIBCPP_TEMPLATE_VIS __hash_map_iterator; + friend class __hash_map_iterator; template - friend class _LIBCPP_TEMPLATE_VIS unordered_map; + friend class unordered_map; template - friend class _LIBCPP_TEMPLATE_VIS unordered_multimap; + friend class unordered_multimap; }; template -class _LIBCPP_TEMPLATE_VIS __hash_const_iterator { +class __hash_const_iterator { static_assert(!is_const::element_type>::value, ""); typedef __hash_node_types<_NodePtr> _NodeTypes; typedef _NodePtr __node_pointer; @@ -395,15 +411,15 @@ private: template friend class __hash_table; template - friend class _LIBCPP_TEMPLATE_VIS __hash_map_const_iterator; + friend class __hash_map_const_iterator; template - friend class _LIBCPP_TEMPLATE_VIS unordered_map; + friend class unordered_map; template - friend class _LIBCPP_TEMPLATE_VIS unordered_multimap; + friend class unordered_multimap; }; template -class _LIBCPP_TEMPLATE_VIS __hash_local_iterator { +class __hash_local_iterator { typedef __hash_node_types<_NodePtr> _NodeTypes; typedef _NodePtr __node_pointer; typedef typename _NodeTypes::__next_pointer __next_pointer; @@ -468,13 +484,13 @@ private: template friend class __hash_table; template - friend class _LIBCPP_TEMPLATE_VIS __hash_const_local_iterator; + friend class __hash_const_local_iterator; template - friend class _LIBCPP_TEMPLATE_VIS __hash_map_iterator; + friend class __hash_map_iterator; }; template -class _LIBCPP_TEMPLATE_VIS __hash_const_local_iterator { +class __hash_const_local_iterator { typedef __hash_node_types<_ConstNodePtr> _NodeTypes; typedef _ConstNodePtr __node_pointer; typedef typename _NodeTypes::__next_pointer __next_pointer; @@ -553,7 +569,7 @@ private: template friend class __hash_table; template - friend class _LIBCPP_TEMPLATE_VIS __hash_map_const_iterator; + friend class __hash_map_const_iterator; }; template @@ -667,14 +683,14 @@ int __diagnose_unordered_container_requirements(void*); template class __hash_table { public: - typedef _Tp value_type; + using value_type = __get_hash_node_value_type_t<_Tp>; typedef _Hash hasher; typedef _Equal key_equal; typedef _Alloc allocator_type; private: typedef allocator_traits __alloc_traits; - typedef typename __make_hash_node_types::type _NodeTypes; + typedef typename __make_hash_node_types<_Tp, typename __alloc_traits::void_pointer>::type _NodeTypes; public: typedef typename _NodeTypes::__node_value_type __node_value_type; @@ -770,9 +786,10 @@ public: _LIBCPP_HIDE_FROM_ABI __hash_table& operator=(const __hash_table& __u); _LIBCPP_HIDE_FROM_ABI __hash_table& operator=(__hash_table&& __u) - _NOEXCEPT_(__node_traits::propagate_on_container_move_assignment::value&& - is_nothrow_move_assignable<__node_allocator>::value&& is_nothrow_move_assignable::value&& - is_nothrow_move_assignable::value); + _NOEXCEPT_(is_nothrow_move_assignable::value&& is_nothrow_move_assignable::value && + ((__node_traits::propagate_on_container_move_assignment::value && + is_nothrow_move_assignable<__node_allocator>::value) || + allocator_traits<__node_allocator>::is_always_equal::value)); template _LIBCPP_HIDE_FROM_ABI void __assign_unique(_InputIterator __first, _InputIterator __last); template @@ -835,27 +852,36 @@ public: template _LIBCPP_HIDE_FROM_ABI iterator __emplace_hint_multi(const_iterator __p, _Args&&... __args); - _LIBCPP_HIDE_FROM_ABI pair __insert_unique(__container_value_type&& __x) { - return __emplace_unique_key_args(_NodeTypes::__get_key(__x), std::move(__x)); + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(value_type&& __value) { + using __key_type = typename _NodeTypes::key_type; + + __node_holder __h = __construct_node(const_cast<__key_type&&>(__value.first), std::move(__value.second)); + __node_insert_unique(__h.get()); + __h.release(); } - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI pair __insert_unique(_Pp&& __x) { - return __emplace_unique(std::forward<_Pp>(__x)); + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(value_type&& __value) { + __node_holder __h = __construct_node(std::move(__value)); + __node_insert_unique(__h.get()); + __h.release(); } - template - _LIBCPP_HIDE_FROM_ABI iterator __insert_multi(_Pp&& __x) { - return __emplace_multi(std::forward<_Pp>(__x)); + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(value_type&& __value) { + using __key_type = typename _NodeTypes::key_type; + + __node_holder __h = __construct_node(const_cast<__key_type&&>(__value.first), std::move(__value.second)); + __node_insert_multi(__h.get()); + __h.release(); } - template - _LIBCPP_HIDE_FROM_ABI iterator __insert_multi(const_iterator __p, _Pp&& __x) { - return __emplace_hint_multi(__p, std::forward<_Pp>(__x)); - } - - _LIBCPP_HIDE_FROM_ABI pair __insert_unique(const __container_value_type& __x) { - return __emplace_unique_key_args(_NodeTypes::__get_key(__x), __x); + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(value_type&& __value) { + __node_holder __h = __construct_node(std::move(__value)); + __node_insert_multi(__h.get()); + __h.release(); } #if _LIBCPP_STD_VER >= 17 @@ -1019,10 +1045,25 @@ private: _LIBCPP_HIDE_FROM_ABI void __deallocate_node(__next_pointer __np) _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI __next_pointer __detach() _NOEXCEPT; + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __assign_value(__get_hash_node_value_type_t<_Tp>& __lhs, _From&& __rhs) { + using __key_type = typename _NodeTypes::key_type; + + // This is technically UB, since the object was constructed as `const`. + // Clang doesn't optimize on this currently though. + const_cast<__key_type&>(__lhs.first) = const_cast<__copy_cvref_t<_From, __key_type>&&>(__rhs.first); + __lhs.second = std::forward<_From>(__rhs).second; + } + + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI void __assign_value(_Tp& __lhs, _From&& __rhs) { + __lhs = std::forward<_From>(__rhs); + } + template - friend class _LIBCPP_TEMPLATE_VIS unordered_map; + friend class unordered_map; template - friend class _LIBCPP_TEMPLATE_VIS unordered_multimap; + friend class unordered_multimap; }; template @@ -1215,8 +1256,8 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, #endif // _LIBCPP_HAS_EXCEPTIONS const_iterator __i = __u.begin(); while (__cache != nullptr && __u.size() != 0) { - __cache->__upcast()->__get_value() = std::move(__u.remove(__i++)->__get_value()); - __next_pointer __next = __cache->__next_; + __assign_value(__cache->__upcast()->__get_value(), std::move(__u.remove(__i++)->__get_value())); + __next_pointer __next = __cache->__next_; __node_insert_multi(__cache->__upcast()); __cache = __next; } @@ -1229,19 +1270,17 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, __deallocate_node(__cache); } const_iterator __i = __u.begin(); - while (__u.size() != 0) { - __node_holder __h = __construct_node(_NodeTypes::__move(__u.remove(__i++)->__get_value())); - __node_insert_multi(__h.get()); - __h.release(); - } + while (__u.size() != 0) + __insert_multi_from_orphaned_node(std::move(__u.remove(__i++)->__get_value())); } } template -inline __hash_table<_Tp, _Hash, _Equal, _Alloc>& -__hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(__hash_table&& __u) _NOEXCEPT_( - __node_traits::propagate_on_container_move_assignment::value&& is_nothrow_move_assignable<__node_allocator>::value&& - is_nothrow_move_assignable::value&& is_nothrow_move_assignable::value) { +inline __hash_table<_Tp, _Hash, _Equal, _Alloc>& __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(__hash_table&& __u) + _NOEXCEPT_(is_nothrow_move_assignable::value&& is_nothrow_move_assignable::value && + ((__node_traits::propagate_on_container_move_assignment::value && + is_nothrow_move_assignable<__node_allocator>::value) || + allocator_traits<__node_allocator>::is_always_equal::value)) { __move_assign(__u, integral_constant()); return *this; } @@ -1260,8 +1299,8 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __ try { #endif // _LIBCPP_HAS_EXCEPTIONS for (; __cache != nullptr && __first != __last; ++__first) { - __cache->__upcast()->__get_value() = *__first; - __next_pointer __next = __cache->__next_; + __assign_value(__cache->__upcast()->__get_value(), *__first); + __next_pointer __next = __cache->__next_; __node_insert_unique(__cache->__upcast()); __cache = __next; } @@ -1274,7 +1313,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __ __deallocate_node(__cache); } for (; __first != __last; ++__first) - __insert_unique(*__first); + __emplace_unique(*__first); } template @@ -1292,7 +1331,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f try { #endif // _LIBCPP_HAS_EXCEPTIONS for (; __cache != nullptr && __first != __last; ++__first) { - __cache->__upcast()->__get_value() = *__first; + __assign_value(__cache->__upcast()->__get_value(), *__first); __next_pointer __next = __cache->__next_; __node_insert_multi(__cache->__upcast()); __cache = __next; @@ -1306,7 +1345,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f __deallocate_node(__cache); } for (; __first != __last; ++__first) - __insert_multi(_NodeTypes::__get_value(*__first)); + __emplace_multi(_NodeTypes::__get_value(*__first)); } template @@ -1769,9 +1808,9 @@ template template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) { - size_t __hash = hash_function()(__k); size_type __bc = bucket_count(); - if (__bc != 0) { + if (__bc != 0 && size() != 0) { + size_t __hash = hash_function()(__k); size_t __chash = std::__constrain_hash(__hash, __bc); __next_pointer __nd = __bucket_list_[__chash]; if (__nd != nullptr) { @@ -1790,9 +1829,9 @@ template template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const { - size_t __hash = hash_function()(__k); size_type __bc = bucket_count(); - if (__bc != 0) { + if (__bc != 0 && size() != 0) { + size_t __hash = hash_function()(__k); size_t __chash = std::__constrain_hash(__hash, __bc); __next_pointer __nd = __bucket_list_[__chash]; if (__nd != nullptr) { diff --git a/lib/libcxx/include/__ios/fpos.h b/lib/libcxx/include/__ios/fpos.h index 1af1e23ee5..e5c21b4391 100644 --- a/lib/libcxx/include/__ios/fpos.h +++ b/lib/libcxx/include/__ios/fpos.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -class _LIBCPP_TEMPLATE_VIS fpos { +class fpos { private: _StateT __st_; streamoff __off_; diff --git a/lib/libcxx/include/__iterator/advance.h b/lib/libcxx/include/__iterator/advance.h index 57b1b845f1..c7d3c1f0e8 100644 --- a/lib/libcxx/include/__iterator/advance.h +++ b/lib/libcxx/include/__iterator/advance.h @@ -65,9 +65,8 @@ template < class _InputIter, _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 void advance(_InputIter& __i, _Distance __orig_n) { typedef typename iterator_traits<_InputIter>::difference_type _Difference; _Difference __n = static_cast<_Difference>(std::__convert_to_integral(__orig_n)); - // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. - _LIBCPP_ASSERT_PEDANTIC(__n >= 0 || __has_bidirectional_iterator_category<_InputIter>::value, - "Attempt to advance(it, n) with negative n on a non-bidirectional iterator"); + _LIBCPP_ASSERT_PEDANTIC(__has_bidirectional_iterator_category<_InputIter>::value || __n >= 0, + "std::advance: Can only pass a negative `n` with a bidirectional_iterator."); std::__advance(__i, __n, typename iterator_traits<_InputIter>::iterator_category()); } @@ -98,9 +97,8 @@ public: // Preconditions: If `I` does not model `bidirectional_iterator`, `n` is not negative. template _LIBCPP_HIDE_FROM_ABI constexpr void operator()(_Ip& __i, iter_difference_t<_Ip> __n) const { - // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. - _LIBCPP_ASSERT_PEDANTIC( - __n >= 0 || bidirectional_iterator<_Ip>, "If `n < 0`, then `bidirectional_iterator` must be true."); + _LIBCPP_ASSERT_PEDANTIC(bidirectional_iterator<_Ip> || __n >= 0, + "ranges::advance: Can only pass a negative `n` with a bidirectional_iterator."); // If `I` models `random_access_iterator`, equivalent to `i += n`. if constexpr (random_access_iterator<_Ip>) { @@ -149,9 +147,9 @@ public: template _Sp> _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip& __i, iter_difference_t<_Ip> __n, _Sp __bound_sentinel) const { - // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. - _LIBCPP_ASSERT_PEDANTIC((__n >= 0) || (bidirectional_iterator<_Ip> && same_as<_Ip, _Sp>), - "If `n < 0`, then `bidirectional_iterator && same_as` must be true."); + _LIBCPP_ASSERT_PEDANTIC( + (bidirectional_iterator<_Ip> && same_as<_Ip, _Sp>) || (__n >= 0), + "ranges::advance: Can only pass a negative `n` with a bidirectional_iterator coming from a common_range."); // If `S` and `I` model `sized_sentinel_for`: if constexpr (sized_sentinel_for<_Sp, _Ip>) { // If |n| >= |bound_sentinel - i|, equivalent to `ranges::advance(i, bound_sentinel)`. diff --git a/lib/libcxx/include/__iterator/aliasing_iterator.h b/lib/libcxx/include/__iterator/aliasing_iterator.h index e01127142a..528e95eb14 100644 --- a/lib/libcxx/include/__iterator/aliasing_iterator.h +++ b/lib/libcxx/include/__iterator/aliasing_iterator.h @@ -12,8 +12,10 @@ #include <__config> #include <__cstddef/ptrdiff_t.h> #include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> #include <__memory/pointer_traits.h> -#include <__type_traits/is_trivial.h> +#include <__type_traits/is_trivially_constructible.h> +#include <__type_traits/is_trivially_copyable.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -44,7 +46,8 @@ struct __aliasing_iterator_wrapper { using reference = value_type&; using pointer = value_type*; - static_assert(is_trivial::value); + static_assert(is_trivially_default_constructible::value); + static_assert(is_trivially_copyable::value); static_assert(sizeof(__base_value_type) == sizeof(value_type)); _LIBCPP_HIDE_FROM_ABI __iterator() = default; @@ -102,7 +105,7 @@ struct __aliasing_iterator_wrapper { _LIBCPP_HIDE_FROM_ABI _Alias operator*() const _NOEXCEPT { _Alias __val; - __builtin_memcpy(&__val, std::__to_address(__base_), sizeof(value_type)); + __builtin_memcpy(std::addressof(__val), std::__to_address(__base_), sizeof(value_type)); return __val; } diff --git a/lib/libcxx/include/__iterator/back_insert_iterator.h b/lib/libcxx/include/__iterator/back_insert_iterator.h index 9a59487533..3a11fae4cb 100644 --- a/lib/libcxx/include/__iterator/back_insert_iterator.h +++ b/lib/libcxx/include/__iterator/back_insert_iterator.h @@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template -class _LIBCPP_TEMPLATE_VIS back_insert_iterator +class back_insert_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif diff --git a/lib/libcxx/include/__iterator/common_iterator.h b/lib/libcxx/include/__iterator/common_iterator.h index 31fc8267e5..a59063d245 100644 --- a/lib/libcxx/include/__iterator/common_iterator.h +++ b/lib/libcxx/include/__iterator/common_iterator.h @@ -28,6 +28,7 @@ #include <__memory/addressof.h> #include <__type_traits/conditional.h> #include <__type_traits/is_pointer.h> +#include <__type_traits/is_referenceable.h> #include <__utility/declval.h> #include @@ -157,7 +158,7 @@ public: ++*this; return __tmp; } else if constexpr (requires(_Iter& __i) { - { *__i++ } -> __can_reference; + { *__i++ } -> __referenceable; } || !__can_use_postfix_proxy<_Iter>) { return std::__unchecked_get<_Iter>(__hold_)++; } else { @@ -272,13 +273,13 @@ concept __common_iter_has_ptr_op = requires(const common_iterator<_Iter, _Sent>& template struct __arrow_type_or_void { - using type = void; + using type _LIBCPP_NODEBUG = void; }; template requires __common_iter_has_ptr_op<_Iter, _Sent> struct __arrow_type_or_void<_Iter, _Sent> { - using type = decltype(std::declval&>().operator->()); + using type _LIBCPP_NODEBUG = decltype(std::declval&>().operator->()); }; template diff --git a/lib/libcxx/include/__iterator/concepts.h b/lib/libcxx/include/__iterator/concepts.h index 6e5ac1d3af..20a1ab4691 100644 --- a/lib/libcxx/include/__iterator/concepts.h +++ b/lib/libcxx/include/__iterator/concepts.h @@ -29,15 +29,19 @@ #include <__iterator/incrementable_traits.h> #include <__iterator/iter_move.h> #include <__iterator/iterator_traits.h> -#include <__iterator/readable_traits.h> #include <__memory/pointer_traits.h> #include <__type_traits/add_pointer.h> #include <__type_traits/common_reference.h> +#include <__type_traits/conditional.h> +#include <__type_traits/disjunction.h> +#include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> #include <__type_traits/invoke.h> #include <__type_traits/is_pointer.h> #include <__type_traits/is_primary_template.h> #include <__type_traits/is_reference.h> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/is_valid_expansion.h> #include <__type_traits/remove_cv.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> @@ -80,12 +84,13 @@ concept __specialization_of_projected = requires { template struct __indirect_value_t_impl { - using type = iter_value_t<_Tp>&; + using type _LIBCPP_NODEBUG = iter_value_t<_Tp>&; }; template <__specialization_of_projected _Tp> struct __indirect_value_t_impl<_Tp> { - using type = invoke_result_t<__projected_projection_t<_Tp>&, - typename __indirect_value_t_impl<__projected_iterator_t<_Tp>>::type>; + using type _LIBCPP_NODEBUG = + invoke_result_t<__projected_projection_t<_Tp>&, + typename __indirect_value_t_impl<__projected_iterator_t<_Tp>>::type>; }; template @@ -131,7 +136,7 @@ concept incrementable = regular<_Ip> && weakly_incrementable<_Ip> && requires(_I // [iterator.concept.iterator] template concept input_or_output_iterator = requires(_Ip __i) { - { *__i } -> __can_reference; + { *__i } -> __referenceable; } && weakly_incrementable<_Ip>; // [iterator.concept.sentinel] @@ -149,6 +154,42 @@ concept sized_sentinel_for = { __i - __s } -> same_as>; }; +template +struct __iter_traits_cache { + using type _LIBCPP_NODEBUG = + _If<__is_primary_template >::value, _Iter, iterator_traits<_Iter> >; +}; +template +using _ITER_TRAITS _LIBCPP_NODEBUG = typename __iter_traits_cache<_Iter>::type; + +struct __iter_concept_concept_test { + template + using _Apply _LIBCPP_NODEBUG = typename _ITER_TRAITS<_Iter>::iterator_concept; +}; +struct __iter_concept_category_test { + template + using _Apply _LIBCPP_NODEBUG = typename _ITER_TRAITS<_Iter>::iterator_category; +}; +struct __iter_concept_random_fallback { + template + using _Apply _LIBCPP_NODEBUG = + __enable_if_t<__is_primary_template >::value, random_access_iterator_tag>; +}; + +template +struct __test_iter_concept : _IsValidExpansion<_Tester::template _Apply, _Iter>, _Tester {}; + +template +struct __iter_concept_cache { + using type _LIBCPP_NODEBUG = + _Or<__test_iter_concept<_Iter, __iter_concept_concept_test>, + __test_iter_concept<_Iter, __iter_concept_category_test>, + __test_iter_concept<_Iter, __iter_concept_random_fallback> >; +}; + +template +using _ITER_CONCEPT _LIBCPP_NODEBUG = typename __iter_concept_cache<_Iter>::type::template _Apply<_Iter>; + // [iterator.concept.input] template concept input_iterator = input_or_output_iterator<_Ip> && indirectly_readable<_Ip> && requires { diff --git a/lib/libcxx/include/__iterator/front_insert_iterator.h b/lib/libcxx/include/__iterator/front_insert_iterator.h index 80819cd22a..d79c4d78b6 100644 --- a/lib/libcxx/include/__iterator/front_insert_iterator.h +++ b/lib/libcxx/include/__iterator/front_insert_iterator.h @@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template -class _LIBCPP_TEMPLATE_VIS front_insert_iterator +class front_insert_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif diff --git a/lib/libcxx/include/__iterator/insert_iterator.h b/lib/libcxx/include/__iterator/insert_iterator.h index e0ee0ce035..95768cb8e0 100644 --- a/lib/libcxx/include/__iterator/insert_iterator.h +++ b/lib/libcxx/include/__iterator/insert_iterator.h @@ -37,7 +37,7 @@ using __insert_iterator_iter_t _LIBCPP_NODEBUG = typename _Container::iterator; _LIBCPP_SUPPRESS_DEPRECATED_PUSH template -class _LIBCPP_TEMPLATE_VIS insert_iterator +class insert_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif diff --git a/lib/libcxx/include/__iterator/istream_iterator.h b/lib/libcxx/include/__iterator/istream_iterator.h index a6c74d0017..cdb8056cfe 100644 --- a/lib/libcxx/include/__iterator/istream_iterator.h +++ b/lib/libcxx/include/__iterator/istream_iterator.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template , class _Distance = ptrdiff_t> -class _LIBCPP_TEMPLATE_VIS istream_iterator +class istream_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif @@ -58,6 +58,9 @@ public: __in_stream_ = nullptr; } + // LWG3600 Changed the wording of the copy constructor. In libc++ this constructor + // can still be trivial after this change. + _LIBCPP_HIDE_FROM_ABI const _Tp& operator*() const { return __value_; } _LIBCPP_HIDE_FROM_ABI const _Tp* operator->() const { return std::addressof((operator*())); } _LIBCPP_HIDE_FROM_ABI istream_iterator& operator++() { diff --git a/lib/libcxx/include/__iterator/istreambuf_iterator.h b/lib/libcxx/include/__iterator/istreambuf_iterator.h index 162873b955..b7b28cd1a0 100644 --- a/lib/libcxx/include/__iterator/istreambuf_iterator.h +++ b/lib/libcxx/include/__iterator/istreambuf_iterator.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template -class _LIBCPP_TEMPLATE_VIS istreambuf_iterator +class istreambuf_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif diff --git a/lib/libcxx/include/__iterator/iter_move.h b/lib/libcxx/include/__iterator/iter_move.h index ba8aed3c0f..5cc1615259 100644 --- a/lib/libcxx/include/__iterator/iter_move.h +++ b/lib/libcxx/include/__iterator/iter_move.h @@ -14,6 +14,7 @@ #include <__config> #include <__iterator/iterator_traits.h> #include <__type_traits/is_reference.h> +#include <__type_traits/is_referenceable.h> #include <__type_traits/remove_cvref.h> #include <__utility/declval.h> #include <__utility/forward.h> @@ -90,7 +91,7 @@ inline constexpr auto iter_move = __iter_move::__fn{}; template <__dereferenceable _Tp> requires requires(_Tp& __t) { - { ranges::iter_move(__t) } -> __can_reference; + { ranges::iter_move(__t) } -> __referenceable; } using iter_rvalue_reference_t = decltype(ranges::iter_move(std::declval<_Tp&>())); diff --git a/lib/libcxx/include/__iterator/iterator.h b/lib/libcxx/include/__iterator/iterator.h index 1591655313..d7fcd8c4dd 100644 --- a/lib/libcxx/include/__iterator/iterator.h +++ b/lib/libcxx/include/__iterator/iterator.h @@ -20,7 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 iterator { +struct _LIBCPP_DEPRECATED_IN_CXX17 iterator { typedef _Tp value_type; typedef _Distance difference_type; typedef _Pointer pointer; diff --git a/lib/libcxx/include/__iterator/iterator_traits.h b/lib/libcxx/include/__iterator/iterator_traits.h index db68dd2c37..f727e8ff36 100644 --- a/lib/libcxx/include/__iterator/iterator_traits.h +++ b/lib/libcxx/include/__iterator/iterator_traits.h @@ -22,16 +22,18 @@ #include <__fwd/pair.h> #include <__iterator/incrementable_traits.h> #include <__iterator/readable_traits.h> +#include <__tuple/tuple_element.h> #include <__type_traits/common_reference.h> #include <__type_traits/conditional.h> +#include <__type_traits/detected_or.h> #include <__type_traits/disjunction.h> -#include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_object.h> #include <__type_traits/is_primary_template.h> #include <__type_traits/is_reference.h> -#include <__type_traits/is_valid_expansion.h> +#include <__type_traits/is_referenceable.h> +#include <__type_traits/nat.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_cv.h> #include <__type_traits/remove_cvref.h> @@ -46,15 +48,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 -template -using __with_reference _LIBCPP_NODEBUG = _Tp&; - -template -concept __can_reference = requires { typename __with_reference<_Tp>; }; - template concept __dereferenceable = requires(_Tp& __t) { - { *__t } -> __can_reference; // not required to be equality-preserving + { *__t } -> __referenceable; // not required to be equality-preserving }; // [iterator.traits] @@ -64,92 +60,17 @@ using iter_reference_t = decltype(*std::declval<_Tp&>()); #endif // _LIBCPP_STD_VER >= 20 template -struct _LIBCPP_TEMPLATE_VIS iterator_traits; +struct iterator_traits; -struct _LIBCPP_TEMPLATE_VIS input_iterator_tag {}; -struct _LIBCPP_TEMPLATE_VIS output_iterator_tag {}; -struct _LIBCPP_TEMPLATE_VIS forward_iterator_tag : public input_iterator_tag {}; -struct _LIBCPP_TEMPLATE_VIS bidirectional_iterator_tag : public forward_iterator_tag {}; -struct _LIBCPP_TEMPLATE_VIS random_access_iterator_tag : public bidirectional_iterator_tag {}; +struct input_iterator_tag {}; +struct output_iterator_tag {}; +struct forward_iterator_tag : public input_iterator_tag {}; +struct bidirectional_iterator_tag : public forward_iterator_tag {}; +struct random_access_iterator_tag : public bidirectional_iterator_tag {}; #if _LIBCPP_STD_VER >= 20 -struct _LIBCPP_TEMPLATE_VIS contiguous_iterator_tag : public random_access_iterator_tag {}; +struct contiguous_iterator_tag : public random_access_iterator_tag {}; #endif -template -struct __iter_traits_cache { - using type = _If< __is_primary_template >::value, _Iter, iterator_traits<_Iter> >; -}; -template -using _ITER_TRAITS _LIBCPP_NODEBUG = typename __iter_traits_cache<_Iter>::type; - -struct __iter_concept_concept_test { - template - using _Apply _LIBCPP_NODEBUG = typename _ITER_TRAITS<_Iter>::iterator_concept; -}; -struct __iter_concept_category_test { - template - using _Apply _LIBCPP_NODEBUG = typename _ITER_TRAITS<_Iter>::iterator_category; -}; -struct __iter_concept_random_fallback { - template - using _Apply _LIBCPP_NODEBUG = - __enable_if_t<__is_primary_template >::value, random_access_iterator_tag>; -}; - -template -struct __test_iter_concept : _IsValidExpansion<_Tester::template _Apply, _Iter>, _Tester {}; - -template -struct __iter_concept_cache { - using type = _Or< __test_iter_concept<_Iter, __iter_concept_concept_test>, - __test_iter_concept<_Iter, __iter_concept_category_test>, - __test_iter_concept<_Iter, __iter_concept_random_fallback> >; -}; - -template -using _ITER_CONCEPT _LIBCPP_NODEBUG = typename __iter_concept_cache<_Iter>::type::template _Apply<_Iter>; - -template -struct __has_iterator_typedefs { -private: - template - static false_type __test(...); - template - static true_type - __test(__void_t* = nullptr, - __void_t* = nullptr, - __void_t* = nullptr, - __void_t* = nullptr, - __void_t* = nullptr); - -public: - static const bool value = decltype(__test<_Tp>(nullptr, nullptr, nullptr, nullptr, nullptr))::value; -}; - -template -struct __has_iterator_category { -private: - template - static false_type __test(...); - template - static true_type __test(typename _Up::iterator_category* = nullptr); - -public: - static const bool value = decltype(__test<_Tp>(nullptr))::value; -}; - -template -struct __has_iterator_concept { -private: - template - static false_type __test(...); - template - static true_type __test(typename _Up::iterator_concept* = nullptr); - -public: - static const bool value = decltype(__test<_Tp>(nullptr))::value; -}; - #if _LIBCPP_STD_VER >= 20 // The `cpp17-*-iterator` exposition-only concepts have very similar names to the `Cpp17*Iterator` named requirements @@ -158,9 +79,9 @@ public: namespace __iterator_traits_detail { template concept __cpp17_iterator = requires(_Ip __i) { - { *__i } -> __can_reference; + { *__i } -> __referenceable; { ++__i } -> same_as<_Ip&>; - { *__i++ } -> __can_reference; + { *__i++ } -> __referenceable; } && copyable<_Ip>; template @@ -219,16 +140,6 @@ concept __specifies_members = requires { requires __has_member_iterator_category<_Ip>; }; -template -struct __iterator_traits_member_pointer_or_void { - using type = void; -}; - -template <__has_member_pointer _Tp> -struct __iterator_traits_member_pointer_or_void<_Tp> { - using type = typename _Tp::pointer; -}; - template concept __cpp17_iterator_missing_members = !__specifies_members<_Tp> && __iterator_traits_detail::__cpp17_iterator<_Tp>; @@ -239,14 +150,14 @@ concept __cpp17_input_iterator_missing_members = // Otherwise, `pointer` names `void`. template struct __iterator_traits_member_pointer_or_arrow_or_void { - using type = void; + using type _LIBCPP_NODEBUG = void; }; // [iterator.traits]/3.2.1 // If the qualified-id `I::pointer` is valid and denotes a type, `pointer` names that type. template <__has_member_pointer _Ip> struct __iterator_traits_member_pointer_or_arrow_or_void<_Ip> { - using type = typename _Ip::pointer; + using type _LIBCPP_NODEBUG = typename _Ip::pointer; }; // Otherwise, if `decltype(declval().operator->())` is well-formed, then `pointer` names that @@ -254,48 +165,48 @@ struct __iterator_traits_member_pointer_or_arrow_or_void<_Ip> { template requires requires(_Ip& __i) { __i.operator->(); } && (!__has_member_pointer<_Ip>) struct __iterator_traits_member_pointer_or_arrow_or_void<_Ip> { - using type = decltype(std::declval<_Ip&>().operator->()); + using type _LIBCPP_NODEBUG = decltype(std::declval<_Ip&>().operator->()); }; // Otherwise, `reference` names `iter-reference-t`. template struct __iterator_traits_member_reference { - using type = iter_reference_t<_Ip>; + using type _LIBCPP_NODEBUG = iter_reference_t<_Ip>; }; // [iterator.traits]/3.2.2 // If the qualified-id `I::reference` is valid and denotes a type, `reference` names that type. template <__has_member_reference _Ip> struct __iterator_traits_member_reference<_Ip> { - using type = typename _Ip::reference; + using type _LIBCPP_NODEBUG = typename _Ip::reference; }; // [iterator.traits]/3.2.3.4 // input_iterator_tag template struct __deduce_iterator_category { - using type = input_iterator_tag; + using type _LIBCPP_NODEBUG = input_iterator_tag; }; // [iterator.traits]/3.2.3.1 // `random_access_iterator_tag` if `I` satisfies `cpp17-random-access-iterator`, or otherwise template <__iterator_traits_detail::__cpp17_random_access_iterator _Ip> struct __deduce_iterator_category<_Ip> { - using type = random_access_iterator_tag; + using type _LIBCPP_NODEBUG = random_access_iterator_tag; }; // [iterator.traits]/3.2.3.2 // `bidirectional_iterator_tag` if `I` satisfies `cpp17-bidirectional-iterator`, or otherwise template <__iterator_traits_detail::__cpp17_bidirectional_iterator _Ip> struct __deduce_iterator_category<_Ip> { - using type = bidirectional_iterator_tag; + using type _LIBCPP_NODEBUG = bidirectional_iterator_tag; }; // [iterator.traits]/3.2.3.3 // `forward_iterator_tag` if `I` satisfies `cpp17-forward-iterator`, or otherwise template <__iterator_traits_detail::__cpp17_forward_iterator _Ip> struct __deduce_iterator_category<_Ip> { - using type = forward_iterator_tag; + using type _LIBCPP_NODEBUG = forward_iterator_tag; }; template @@ -306,13 +217,13 @@ struct __iterator_traits_iterator_category : __deduce_iterator_category<_Ip> {}; // that type. template <__has_member_iterator_category _Ip> struct __iterator_traits_iterator_category<_Ip> { - using type = typename _Ip::iterator_category; + using type _LIBCPP_NODEBUG = typename _Ip::iterator_category; }; // otherwise, it names void. template struct __iterator_traits_difference_type { - using type = void; + using type _LIBCPP_NODEBUG = void; }; // If the qualified-id `incrementable_traits::difference_type` is valid and denotes a type, then @@ -320,7 +231,7 @@ struct __iterator_traits_difference_type { template requires requires { typename incrementable_traits<_Ip>::difference_type; } struct __iterator_traits_difference_type<_Ip> { - using type = typename incrementable_traits<_Ip>::difference_type; + using type _LIBCPP_NODEBUG = typename incrementable_traits<_Ip>::difference_type; }; // [iterator.traits]/3.4 @@ -328,6 +239,9 @@ struct __iterator_traits_difference_type<_Ip> { template struct __iterator_traits {}; +template +using __pointer_member _LIBCPP_NODEBUG = typename _Tp::pointer; + // [iterator.traits]/3.1 // If `I` has valid ([temp.deduct]) member types `difference-type`, `value-type`, `reference`, and // `iterator-category`, then `iterator-traits` has the following publicly accessible members: @@ -336,7 +250,7 @@ struct __iterator_traits<_Ip> { using iterator_category = typename _Ip::iterator_category; using value_type = typename _Ip::value_type; using difference_type = typename _Ip::difference_type; - using pointer = typename __iterator_traits_member_pointer_or_void<_Ip>::type; + using pointer = __detected_or_t; using reference = typename _Ip::reference; }; @@ -391,13 +305,30 @@ struct __iterator_traits<_Iter, true> is_convertible::value || is_convertible::value > {}; +template +struct __has_iterator_typedefs { +private: + template + static false_type __test(...); + template + static true_type + __test(__void_t* = nullptr, + __void_t* = nullptr, + __void_t* = nullptr, + __void_t* = nullptr, + __void_t* = nullptr); + +public: + static const bool value = decltype(__test<_Tp>(nullptr, nullptr, nullptr, nullptr, nullptr))::value; +}; + // iterator_traits will only have the nested types if Iterator::iterator_category // exists. Else iterator_traits will be an empty class. This is a // conforming extension which allows some programs to compile and behave as // the client expects instead of failing at compile time. template -struct _LIBCPP_TEMPLATE_VIS iterator_traits : __iterator_traits<_Iter, __has_iterator_typedefs<_Iter>::value> { +struct iterator_traits : __iterator_traits<_Iter, __has_iterator_typedefs<_Iter>::value> { using __primary_template _LIBCPP_NODEBUG = iterator_traits; }; #endif // _LIBCPP_STD_VER >= 20 @@ -406,7 +337,7 @@ template #if _LIBCPP_STD_VER >= 20 requires is_object_v<_Tp> #endif -struct _LIBCPP_TEMPLATE_VIS iterator_traits<_Tp*> { +struct iterator_traits<_Tp*> { typedef ptrdiff_t difference_type; typedef __remove_cv_t<_Tp> value_type; typedef _Tp* pointer; @@ -417,18 +348,19 @@ struct _LIBCPP_TEMPLATE_VIS iterator_traits<_Tp*> { #endif }; -template >::value> -struct __has_iterator_category_convertible_to : is_convertible::iterator_category, _Up> { -}; +template +using __iterator_category _LIBCPP_NODEBUG = typename _Tp::iterator_category; + +template +using __iterator_concept _LIBCPP_NODEBUG = typename _Tp::iterator_concept; template -struct __has_iterator_category_convertible_to<_Tp, _Up, false> : false_type {}; - -template ::value> -struct __has_iterator_concept_convertible_to : is_convertible {}; +using __has_iterator_category_convertible_to _LIBCPP_NODEBUG = + is_convertible<__detected_or_t<__nat, __iterator_category, iterator_traits<_Tp> >, _Up>; template -struct __has_iterator_concept_convertible_to<_Tp, _Up, false> : false_type {}; +using __has_iterator_concept_convertible_to _LIBCPP_NODEBUG = + is_convertible<__detected_or_t<__nat, __iterator_concept, _Tp>, _Up>; template using __has_input_iterator_category _LIBCPP_NODEBUG = __has_iterator_category_convertible_to<_Tp, input_iterator_tag>; @@ -490,6 +422,18 @@ using __has_exactly_bidirectional_iterator_category _LIBCPP_NODEBUG = template using __iter_value_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type; +#if _LIBCPP_STD_VER >= 23 +template +using __iter_key_type _LIBCPP_NODEBUG = remove_const_t>>; + +template +using __iter_mapped_type _LIBCPP_NODEBUG = tuple_element_t<1, __iter_value_type<_InputIterator>>; + +template +using __iter_to_alloc_type _LIBCPP_NODEBUG = + pair>, + tuple_element_t<1, __iter_value_type<_InputIterator>>>; +#else template using __iter_key_type _LIBCPP_NODEBUG = __remove_const_t::value_type::first_type>; @@ -501,6 +445,7 @@ template using __iter_to_alloc_type _LIBCPP_NODEBUG = pair::value_type::first_type, typename iterator_traits<_InputIterator>::value_type::second_type>; +#endif // _LIBCPP_STD_VER >= 23 template using __iterator_category_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category; diff --git a/lib/libcxx/include/__iterator/move_iterator.h b/lib/libcxx/include/__iterator/move_iterator.h index a1c53e9bd2..2b64680a34 100644 --- a/lib/libcxx/include/__iterator/move_iterator.h +++ b/lib/libcxx/include/__iterator/move_iterator.h @@ -64,7 +64,7 @@ concept __move_iter_comparable = requires { #endif // _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS move_iterator +class move_iterator #if _LIBCPP_STD_VER >= 20 : public __move_iter_category_base<_Iter> #endif diff --git a/lib/libcxx/include/__iterator/move_sentinel.h b/lib/libcxx/include/__iterator/move_sentinel.h index 4a2a09ef06..c77ca5f1a9 100644 --- a/lib/libcxx/include/__iterator/move_sentinel.h +++ b/lib/libcxx/include/__iterator/move_sentinel.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS move_sentinel { +class move_sentinel { public: _LIBCPP_HIDE_FROM_ABI move_sentinel() = default; diff --git a/lib/libcxx/include/__iterator/next.h b/lib/libcxx/include/__iterator/next.h index 1f68a5bec8..1143ab31ff 100644 --- a/lib/libcxx/include/__iterator/next.h +++ b/lib/libcxx/include/__iterator/next.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___ITERATOR_NEXT_H #define _LIBCPP___ITERATOR_NEXT_H -#include <__assert> #include <__config> #include <__iterator/advance.h> #include <__iterator/concepts.h> @@ -27,11 +26,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter next(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = 1) { - // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. - // Note that this check duplicates the similar check in `std::advance`. - _LIBCPP_ASSERT_PEDANTIC(__n >= 0 || __has_bidirectional_iterator_category<_InputIter>::value, - "Attempt to next(it, n) with negative n on a non-bidirectional iterator"); - std::advance(__x, __n); return __x; } diff --git a/lib/libcxx/include/__iterator/ostream_iterator.h b/lib/libcxx/include/__iterator/ostream_iterator.h index 93ecc03010..2b459f4628 100644 --- a/lib/libcxx/include/__iterator/ostream_iterator.h +++ b/lib/libcxx/include/__iterator/ostream_iterator.h @@ -26,7 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template > -class _LIBCPP_TEMPLATE_VIS ostream_iterator +class ostream_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif diff --git a/lib/libcxx/include/__iterator/ostreambuf_iterator.h b/lib/libcxx/include/__iterator/ostreambuf_iterator.h index f00449355e..7133331a7b 100644 --- a/lib/libcxx/include/__iterator/ostreambuf_iterator.h +++ b/lib/libcxx/include/__iterator/ostreambuf_iterator.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template -class _LIBCPP_TEMPLATE_VIS ostreambuf_iterator +class ostreambuf_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator #endif diff --git a/lib/libcxx/include/__iterator/prev.h b/lib/libcxx/include/__iterator/prev.h index bffd5527dc..97139e067c 100644 --- a/lib/libcxx/include/__iterator/prev.h +++ b/lib/libcxx/include/__iterator/prev.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___ITERATOR_PREV_H #define _LIBCPP___ITERATOR_PREV_H -#include <__assert> #include <__config> #include <__iterator/advance.h> #include <__iterator/concepts.h> @@ -31,10 +30,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter prev(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n) { - // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. - // Note that this check duplicates the similar check in `std::advance`. - _LIBCPP_ASSERT_PEDANTIC(__n <= 0 || __has_bidirectional_iterator_category<_InputIter>::value, - "Attempt to prev(it, n) with a positive n on a non-bidirectional iterator"); std::advance(__x, -__n); return __x; } diff --git a/lib/libcxx/include/__iterator/product_iterator.h b/lib/libcxx/include/__iterator/product_iterator.h new file mode 100644 index 0000000000..1db673bafd --- /dev/null +++ b/lib/libcxx/include/__iterator/product_iterator.h @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_PRODUCT_ITERATOR_H +#define _LIBCPP___ITERATOR_PRODUCT_ITERATOR_H + +// Product iterators are iterators that contain two or more underlying iterators. +// +// For example, std::flat_map stores its data into two separate containers, and its iterator +// is a proxy over two separate underlying iterators. The concept of product iterators +// allows algorithms to operate over these underlying iterators separately, opening the +// door to various optimizations. +// +// If __product_iterator_traits can be instantiated, the following functions and associated types must be provided: +// - static constexpr size_t Traits::__size +// The number of underlying iterators inside the product iterator. +// +// - template +// static decltype(auto) Traits::__get_iterator_element(It&& __it) +// Returns the _Nth iterator element of the given product iterator. +// +// - template +// static _Iterator __make_product_iterator(_Iters&&...); +// Creates a product iterator from the given underlying iterators. + +#include <__config> +#include <__cstddef/size_t.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> +#include <__utility/declval.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __product_iterator_traits; +/* exposition-only: +{ + static constexpr size_t __size = ...; + + template + static decltype(auto) __get_iterator_element(_Iter&&); + + template + static _Iterator __make_product_iterator(_Iters&&...); +}; +*/ + +template +struct __is_product_iterator : false_type {}; + +template +struct __is_product_iterator<_Tp, sizeof(__product_iterator_traits<_Tp>) * 0> : true_type {}; + +template +struct __is_product_iterator_of_size : false_type {}; + +template +struct __is_product_iterator_of_size<_Tp, _Size, __enable_if_t<__product_iterator_traits<_Tp>::__size == _Size> > + : true_type {}; + +template +using __product_iterator_element_t _LIBCPP_NODEBUG = + decltype(__product_iterator_traits<_Iterator>::template __get_iterator_element<_Nth>(std::declval<_Iterator>())); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ITERATOR_PRODUCT_ITERATOR_H diff --git a/lib/libcxx/include/__iterator/reverse_iterator.h b/lib/libcxx/include/__iterator/reverse_iterator.h index 5bd1f868d3..8935e5a8ff 100644 --- a/lib/libcxx/include/__iterator/reverse_iterator.h +++ b/lib/libcxx/include/__iterator/reverse_iterator.h @@ -48,7 +48,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_SUPPRESS_DEPRECATED_PUSH template -class _LIBCPP_TEMPLATE_VIS reverse_iterator +class reverse_iterator #if _LIBCPP_STD_VER <= 14 || !defined(_LIBCPP_ABI_NO_ITERATOR_BASES) : public iterator::iterator_category, typename iterator_traits<_Iter>::value_type, diff --git a/lib/libcxx/include/__iterator/segmented_iterator.h b/lib/libcxx/include/__iterator/segmented_iterator.h index 7a8e1addea..af27a7be41 100644 --- a/lib/libcxx/include/__iterator/segmented_iterator.h +++ b/lib/libcxx/include/__iterator/segmented_iterator.h @@ -42,6 +42,7 @@ #include <__config> #include <__cstddef/size_t.h> +#include <__iterator/iterator_traits.h> #include <__type_traits/integral_constant.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -74,6 +75,11 @@ struct __has_specialization<_Tp, sizeof(_Tp) * 0> : true_type {}; template using __is_segmented_iterator _LIBCPP_NODEBUG = __has_specialization<__segmented_iterator_traits<_Iterator> >; +template +struct __has_random_access_local_iterator + : __has_random_access_iterator_category< + typename __segmented_iterator_traits< _SegmentedIterator >::__local_iterator > {}; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___SEGMENTED_ITERATOR_H diff --git a/lib/libcxx/include/__iterator/wrap_iter.h b/lib/libcxx/include/__iterator/wrap_iter.h index 966c4675b7..2b5bc489dd 100644 --- a/lib/libcxx/include/__iterator/wrap_iter.h +++ b/lib/libcxx/include/__iterator/wrap_iter.h @@ -112,9 +112,9 @@ private: template friend class basic_string_view; template - friend class _LIBCPP_TEMPLATE_VIS vector; + friend class vector; template - friend class _LIBCPP_TEMPLATE_VIS span; + friend class span; template friend struct array; }; @@ -236,7 +236,7 @@ struct __libcpp_is_contiguous_iterator<__wrap_iter<_It> > : true_type {}; #endif template -struct _LIBCPP_TEMPLATE_VIS pointer_traits<__wrap_iter<_It> > { +struct pointer_traits<__wrap_iter<_It> > { typedef __wrap_iter<_It> pointer; typedef typename pointer_traits<_It>::element_type element_type; typedef typename pointer_traits<_It>::difference_type difference_type; diff --git a/lib/libcxx/include/__locale b/lib/libcxx/include/__locale index dfe79d5e50..757a53951f 100644 --- a/lib/libcxx/include/__locale +++ b/lib/libcxx/include/__locale @@ -11,36 +11,43 @@ #define _LIBCPP___LOCALE #include <__config> -#include <__locale_dir/locale_base_api.h> -#include <__memory/shared_count.h> -#include <__mutex/once_flag.h> -#include <__type_traits/make_unsigned.h> -#include <__utility/no_destroy.h> -#include <__utility/private_constructor_tag.h> -#include -#include -#include -#include -#include + +#if _LIBCPP_HAS_LOCALIZATION + +# include <__locale_dir/locale_base_api.h> +# include <__memory/addressof.h> +# include <__memory/shared_count.h> +# include <__mutex/once_flag.h> +# include <__type_traits/make_unsigned.h> +# include <__utility/no_destroy.h> +# include <__utility/private_constructor_tag.h> +# include +# include +# include +# include +# include // Some platforms require more includes than others. Keep the includes on all plaforms for now. -#include -#include +# include +# include -#if _LIBCPP_HAS_WIDE_CHARACTERS -# include -#else -# include <__std_mbstate_t.h> -#endif +# if _LIBCPP_HAS_WIDE_CHARACTERS +# include +# else +# include <__std_mbstate_t.h> +# endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD class _LIBCPP_EXPORTED_FROM_ABI locale; +template +class collate; + template _LIBCPP_HIDE_FROM_ABI bool has_facet(const locale&) _NOEXCEPT; @@ -49,8 +56,10 @@ _LIBCPP_HIDE_FROM_ABI const _Facet& use_facet(const locale&); class _LIBCPP_EXPORTED_FROM_ABI locale { public: - // locale is essentially a shared_ptr that doesn't support weak_ptrs and never got a move constructor. + // locale is essentially a shared_ptr that doesn't support weak_ptrs and never got a move constructor, + // so it is trivially relocatable. Like shared_ptr, it is also replaceable. using __trivially_relocatable _LIBCPP_NODEBUG = locale; + using __replaceable _LIBCPP_NODEBUG = locale; // types: class _LIBCPP_EXPORTED_FROM_ABI facet; @@ -80,17 +89,25 @@ public: const locale& operator=(const locale&) _NOEXCEPT; template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS locale combine(const locale&) const; + _LIBCPP_HIDE_FROM_ABI locale combine(const locale& __other) const { + if (!std::has_facet<_Facet>(__other)) + __throw_runtime_error("locale::combine: locale missing facet"); + + return locale(*this, std::addressof(const_cast<_Facet&>(std::use_facet<_Facet>(__other)))); + } // locale operations: string name() const; bool operator==(const locale&) const; -#if _LIBCPP_STD_VER <= 17 +# if _LIBCPP_STD_VER <= 17 _LIBCPP_HIDE_FROM_ABI bool operator!=(const locale& __y) const { return !(*this == __y); } -#endif +# endif template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool - operator()(const basic_string<_CharT, _Traits, _Allocator>&, const basic_string<_CharT, _Traits, _Allocator>&) const; + _LIBCPP_HIDE_FROM_ABI bool operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, + const basic_string<_CharT, _Traits, _Allocator>& __y) const { + return std::use_facet >(*this).compare( + __x.data(), __x.data() + __x.size(), __y.data(), __y.data() + __y.size()) < 0; + } // global locale objects: static locale global(const locale&); @@ -151,14 +168,6 @@ inline _LIBCPP_HIDE_FROM_ABI locale::locale(const locale& __other, _Facet* __f) __install_ctor(__other, __f, __f ? __f->id.__get() : 0); } -template -locale locale::combine(const locale& __other) const { - if (!std::has_facet<_Facet>(__other)) - __throw_runtime_error("locale::combine: locale missing facet"); - - return locale(*this, &const_cast<_Facet&>(std::use_facet<_Facet>(__other))); -} - template inline _LIBCPP_HIDE_FROM_ABI bool has_facet(const locale& __l) _NOEXCEPT { return __l.has_facet(_Facet::id); @@ -172,7 +181,7 @@ inline _LIBCPP_HIDE_FROM_ABI const _Facet& use_facet(const locale& __l) { // template class collate; template -class _LIBCPP_TEMPLATE_VIS collate : public locale::facet { +class collate : public locale::facet { public: typedef _CharT char_type; typedef basic_string string_type; @@ -237,14 +246,14 @@ long collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) cons } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#endif +# endif // template class collate_byname; template -class _LIBCPP_TEMPLATE_VIS collate_byname; +class collate_byname; template <> class _LIBCPP_EXPORTED_FROM_ABI collate_byname : public collate { @@ -264,7 +273,7 @@ protected: string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI collate_byname : public collate { __locale::__locale_t __l_; @@ -283,20 +292,13 @@ protected: const char_type* __lo1, const char_type* __hi1, const char_type* __lo2, const char_type* __hi2) const override; string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#endif - -template -bool locale::operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, - const basic_string<_CharT, _Traits, _Allocator>& __y) const { - return std::use_facet >(*this).compare( - __x.data(), __x.data() + __x.size(), __y.data(), __y.data() + __y.size()) < 0; -} +# endif // template class ctype class _LIBCPP_EXPORTED_FROM_ABI ctype_base { public: -#if defined(_LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE) +# if defined(_LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE) typedef unsigned long mask; static const mask space = 1 << 0; static const mask print = 1 << 1; @@ -308,14 +310,14 @@ public: static const mask punct = 1 << 7; static const mask xdigit = 1 << 8; static const mask blank = 1 << 9; -# if defined(__BIONIC__) +# if defined(__BIONIC__) // Historically this was a part of regex_traits rather than ctype_base. The // historical value of the constant is preserved for ABI compatibility. static const mask __regex_word = 0x8000; -# else +# else static const mask __regex_word = 1 << 10; -# endif // defined(__BIONIC__) -#elif defined(__GLIBC__) +# endif // defined(__BIONIC__) +# elif defined(__GLIBC__) typedef unsigned short mask; static const mask space = _ISspace; static const mask print = _ISprint; @@ -327,12 +329,12 @@ public: static const mask punct = _ISpunct; static const mask xdigit = _ISxdigit; static const mask blank = _ISblank; -# if defined(__mips__) || (BYTE_ORDER == BIG_ENDIAN) +# if defined(__mips__) || (BYTE_ORDER == BIG_ENDIAN) static const mask __regex_word = static_cast(_ISbit(15)); -# else +# else static const mask __regex_word = 0x80; -# endif -#elif defined(_LIBCPP_MSVCRT_LIKE) +# endif +# elif defined(_LIBCPP_MSVCRT_LIKE) typedef unsigned short mask; static const mask space = _SPACE; static const mask print = _BLANK | _PUNCT | _ALPHA | _DIGIT; @@ -345,16 +347,16 @@ public: static const mask xdigit = _HEX; static const mask blank = _BLANK; static const mask __regex_word = 0x4000; // 0x8000 and 0x0100 and 0x00ff are used -# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT -# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA -#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) -# ifdef __APPLE__ +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA +# elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) +# ifdef __APPLE__ typedef uint32_t mask; -# elif defined(__FreeBSD__) +# elif defined(__FreeBSD__) typedef unsigned long mask; -# elif defined(__NetBSD__) +# elif defined(__NetBSD__) typedef unsigned short mask; -# endif +# endif static const mask space = _CTYPE_S; static const mask print = _CTYPE_R; static const mask cntrl = _CTYPE_C; @@ -365,16 +367,16 @@ public: static const mask punct = _CTYPE_P; static const mask xdigit = _CTYPE_X; -# if defined(__NetBSD__) +# if defined(__NetBSD__) static const mask blank = _CTYPE_BL; // NetBSD defines classes up to 0x2000 // see sys/ctype_bits.h, _CTYPE_Q static const mask __regex_word = 0x8000; -# else +# else static const mask blank = _CTYPE_B; static const mask __regex_word = 0x80; -# endif -#elif defined(_AIX) +# endif +# elif defined(_AIX) typedef unsigned int mask; static const mask space = _ISSPACE; static const mask print = _ISPRINT; @@ -387,7 +389,7 @@ public: static const mask xdigit = _ISXDIGIT; static const mask blank = _ISBLANK; static const mask __regex_word = 0x8000; -#elif defined(_NEWLIB_VERSION) +# elif defined(_NEWLIB_VERSION) // Same type as Newlib's _ctype_ array in newlib/libc/include/ctype.h. typedef char mask; // In case char is signed, static_cast is needed to avoid warning on @@ -404,11 +406,11 @@ public: static const mask blank = static_cast(_B); // mask is already fully saturated, use a different type in regex_type_traits. static const unsigned short __regex_word = 0x100; -# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT -# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA -# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT -#elif defined(__MVS__) -# if defined(__NATIVE_ASCII_F) +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT +# elif defined(__MVS__) +# if defined(__NATIVE_ASCII_F) typedef unsigned int mask; static const mask space = _ISSPACE_A; static const mask print = _ISPRINT_A; @@ -420,7 +422,7 @@ public: static const mask punct = _ISPUNCT_A; static const mask xdigit = _ISXDIGIT_A; static const mask blank = _ISBLANK_A; -# else +# else typedef unsigned short mask; static const mask space = __ISSPACE; static const mask print = __ISPRINT; @@ -432,11 +434,11 @@ public: static const mask punct = __ISPUNCT; static const mask xdigit = __ISXDIGIT; static const mask blank = __ISBLANK; -# endif +# endif static const mask __regex_word = 0x8000; -#else -# error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE? -#endif +# else +# error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE? +# endif static const mask alnum = alpha | digit; static const mask graph = alnum | punct; @@ -448,9 +450,9 @@ public: }; template -class _LIBCPP_TEMPLATE_VIS ctype; +class ctype; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI ctype : public locale::facet, public ctype_base { public: @@ -515,7 +517,7 @@ protected: virtual const char_type* do_narrow(const char_type* __low, const char_type* __high, char __dfault, char* __dest) const; }; -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_isascii(int __c) { return (__c & ~0x7F) == 0; } @@ -580,25 +582,13 @@ public: static locale::id id; -#ifdef _CACHED_RUNES +# ifdef _CACHED_RUNES static const size_t table_size = _CACHED_RUNES; -#else +# else static const size_t table_size = 256; // FIXME: Don't hardcode this. -#endif +# endif _LIBCPP_HIDE_FROM_ABI const mask* table() const _NOEXCEPT { return __tab_; } static const mask* classic_table() _NOEXCEPT; -#if defined(__GLIBC__) || defined(__EMSCRIPTEN__) - static const int* __classic_upper_table() _NOEXCEPT; - static const int* __classic_lower_table() _NOEXCEPT; -#endif -#if defined(__NetBSD__) - static const short* __classic_upper_table() _NOEXCEPT; - static const short* __classic_lower_table() _NOEXCEPT; -#endif -#if defined(__MVS__) - static const unsigned short* __classic_upper_table() _NOEXCEPT; - static const unsigned short* __classic_lower_table() _NOEXCEPT; -#endif protected: ~ctype() override; @@ -615,7 +605,7 @@ protected: // template class ctype_byname; template -class _LIBCPP_TEMPLATE_VIS ctype_byname; +class ctype_byname; template <> class _LIBCPP_EXPORTED_FROM_ABI ctype_byname : public ctype { @@ -633,7 +623,7 @@ protected: const char_type* do_tolower(char_type* __low, const char_type* __high) const override; }; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI ctype_byname : public ctype { __locale::__locale_t __l_; @@ -658,7 +648,7 @@ protected: const char_type* do_narrow(const char_type* __low, const char_type* __high, char __dfault, char* __dest) const override; }; -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS template inline _LIBCPP_HIDE_FROM_ABI bool isspace(_CharT __c, const locale& __loc) { @@ -741,7 +731,7 @@ public: // template class codecvt; template -class _LIBCPP_TEMPLATE_VIS codecvt; +class codecvt; // template <> class codecvt @@ -824,7 +814,7 @@ protected: // template <> class codecvt -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI codecvt : public locale::facet, public codecvt_base { __locale::__locale_t __l_; @@ -903,7 +893,7 @@ protected: virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, size_t __mx) const; virtual int do_max_length() const _NOEXCEPT; }; -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS // template <> class codecvt // deprecated in C++20 @@ -985,7 +975,7 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -#if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T // template <> class codecvt // C++20 @@ -1066,7 +1056,7 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -#endif +# endif // template <> class codecvt // deprecated in C++20 @@ -1148,7 +1138,7 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -#if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T // template <> class codecvt // C++20 @@ -1229,12 +1219,12 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -#endif +# endif // template class codecvt_byname template -class _LIBCPP_TEMPLATE_VIS codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> { +class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> { public: _LIBCPP_HIDE_FROM_ABI explicit codecvt_byname(const char* __nm, size_t __refs = 0) : codecvt<_InternT, _ExternT, _StateT>(__nm, __refs) {} @@ -1251,17 +1241,17 @@ codecvt_byname<_InternT, _ExternT, _StateT>::~codecvt_byname() {} _LIBCPP_SUPPRESS_DEPRECATED_POP extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; -#endif +# endif extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // deprecated in C++20 extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // deprecated in C++20 -#if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // C++20 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // C++20 -#endif +# endif template struct __narrow_to_utf8 { @@ -1298,7 +1288,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI __narrow_to_utf8<16> : public codecvt : public codecvt : public codecvt __sz ? __nb + __sz : __ne, __nn, __buf, __buf + __sz, __bn); if (__r == codecvt_base::error || __nn == __nb) - __throw_runtime_error("locale not supported"); + std::__throw_runtime_error("locale not supported"); for (const char16_t* __p = __buf; __p < __bn; ++__p, ++__s) *__s = *__p; __nb = __nn; @@ -1398,7 +1388,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI __widen_from_utf8<32> : public codecvt __sz ? __nb + __sz : __ne, __nn, __buf, __buf + __sz, __bn); if (__r == codecvt_base::error || __nn == __nb) - __throw_runtime_error("locale not supported"); + std::__throw_runtime_error("locale not supported"); for (const char32_t* __p = __buf; __p < __bn; ++__p, ++__s) *__s = *__p; __nb = __nn; @@ -1410,7 +1400,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI __widen_from_utf8<32> : public codecvt class numpunct template -class _LIBCPP_TEMPLATE_VIS numpunct; +class numpunct; template <> class _LIBCPP_EXPORTED_FROM_ABI numpunct : public locale::facet { @@ -1441,7 +1431,7 @@ protected: string __grouping_; }; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI numpunct : public locale::facet { public: @@ -1470,12 +1460,12 @@ protected: char_type __thousands_sep_; string __grouping_; }; -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS // template class numpunct_byname template -class _LIBCPP_TEMPLATE_VIS numpunct_byname; +class numpunct_byname; template <> class _LIBCPP_EXPORTED_FROM_ABI numpunct_byname : public numpunct { @@ -1493,7 +1483,7 @@ private: void __init(const char*); }; -#if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI numpunct_byname : public numpunct { public: @@ -1509,8 +1499,10 @@ protected: private: void __init(const char*); }; -#endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_HAS_LOCALIZATION + #endif // _LIBCPP___LOCALE diff --git a/lib/libcxx/include/__locale_dir/check_grouping.h b/lib/libcxx/include/__locale_dir/check_grouping.h new file mode 100644 index 0000000000..93e9e404bb --- /dev/null +++ b/lib/libcxx/include/__locale_dir/check_grouping.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_CHECK_GROUPING_H +#define _LIBCPP___LOCALE_DIR_CHECK_GROUPING_H + +#include <__config> +#include <__fwd/string.h> +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +_LIBCPP_EXPORTED_FROM_ABI void +__check_grouping(const string& __grouping, unsigned* __g, unsigned* __g_end, ios_base::iostate& __err); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_CHECK_GROUPING_H diff --git a/lib/libcxx/include/__locale_dir/get_c_locale.h b/lib/libcxx/include/__locale_dir/get_c_locale.h new file mode 100644 index 0000000000..e8bac9a870 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/get_c_locale.h @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_GET_C_LOCALE_H +#define _LIBCPP___LOCALE_DIR_GET_C_LOCALE_H + +#include <__config> +#include <__locale_dir/locale_base_api.h> + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// FIXME: This should really be part of the locale base API + +# if defined(__APPLE__) || defined(__FreeBSD__) +# define _LIBCPP_GET_C_LOCALE 0 +# elif defined(__NetBSD__) +# define _LIBCPP_GET_C_LOCALE LC_C_LOCALE +# else +# define _LIBCPP_GET_C_LOCALE __cloc() +// Get the C locale object +_LIBCPP_EXPORTED_FROM_ABI __locale::__locale_t __cloc(); +# define __cloc_defined +# endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_GET_C_LOCALE_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api.h b/lib/libcxx/include/__locale_dir/locale_base_api.h index 538b194ec3..5e6c69e95e 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api.h @@ -64,8 +64,6 @@ // Character manipulation functions // -------------------------------- // namespace __locale { -// int __islower(int, __locale_t); -// int __isupper(int, __locale_t); // int __isdigit(int, __locale_t); // required by the headers // int __isxdigit(int, __locale_t); // required by the headers // int __toupper(int, __locale_t); @@ -111,59 +109,64 @@ // int __sscanf(const char*, __locale_t, const char*, ...); // required by the headers // } -#if defined(__APPLE__) -# include <__locale_dir/support/apple.h> -#elif defined(__FreeBSD__) -# include <__locale_dir/support/freebsd.h> -#elif defined(__NetBSD__) -# include <__locale_dir/support/netbsd.h> -#elif defined(_LIBCPP_MSVCRT_LIKE) -# include <__locale_dir/support/windows.h> -#elif defined(__Fuchsia__) -# include <__locale_dir/support/fuchsia.h> -#else +#if _LIBCPP_HAS_LOCALIZATION + +# if defined(__APPLE__) +# include <__locale_dir/support/apple.h> +# elif defined(__FreeBSD__) +# include <__locale_dir/support/freebsd.h> +/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */ +# elif defined(__NetBSD__) +# include <__locale_dir/support/netbsd.h> +# elif defined(_LIBCPP_MSVCRT_LIKE) +# include <__locale_dir/support/windows.h> +# elif defined(__Fuchsia__) +# include <__locale_dir/support/fuchsia.h> +# elif defined(__linux__) +# include <__locale_dir/support/linux.h> +# else // TODO: This is a temporary definition to bridge between the old way we defined the locale base API // (by providing global non-reserved names) and the new API. As we move individual platforms // towards the new way of defining the locale base API, this should disappear since each platform // will define those directly. -# if defined(_AIX) || defined(__MVS__) -# include <__locale_dir/locale_base_api/ibm.h> -# elif defined(__ANDROID__) -# include <__locale_dir/locale_base_api/android.h> -# elif defined(__OpenBSD__) -# include <__locale_dir/locale_base_api/openbsd.h> -# elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC -# include <__locale_dir/locale_base_api/musl.h> -# endif +# if defined(_AIX) || defined(__MVS__) +# include <__locale_dir/locale_base_api/ibm.h> +# elif defined(__ANDROID__) +# include <__locale_dir/locale_base_api/android.h> +# elif defined(__OpenBSD__) +# include <__locale_dir/locale_base_api/openbsd.h> +# elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC +# include <__locale_dir/locale_base_api/musl.h> +# endif -# include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> +# include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> -# include <__cstddef/size_t.h> -# include <__utility/forward.h> -# include -# include -# include -# if _LIBCPP_HAS_WIDE_CHARACTERS -# include -# endif +# include <__cstddef/size_t.h> +# include <__utility/forward.h> +# include +# include +# include +# if _LIBCPP_HAS_WIDE_CHARACTERS +# include +# endif _LIBCPP_BEGIN_NAMESPACE_STD namespace __locale { // // Locale management // -# define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK -# define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK -# define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK -# define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK -# define _LIBCPP_TIME_MASK LC_TIME_MASK -# define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK -# define _LIBCPP_ALL_MASK LC_ALL_MASK -# define _LIBCPP_LC_ALL LC_ALL +# define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +# define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +# define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +# define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +# define _LIBCPP_TIME_MASK LC_TIME_MASK +# define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +# define _LIBCPP_ALL_MASK LC_ALL_MASK +# define _LIBCPP_LC_ALL LC_ALL using __locale_t _LIBCPP_NODEBUG = locale_t; -# if defined(_LIBCPP_BUILDING_LIBRARY) +# if defined(_LIBCPP_BUILDING_LIBRARY) using __lconv_t _LIBCPP_NODEBUG = lconv; inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) { @@ -177,7 +180,7 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __loc inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { freelocale(__loc); } inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return __libcpp_localeconv_l(__loc); } -# endif // _LIBCPP_BUILDING_LIBRARY +# endif // _LIBCPP_BUILDING_LIBRARY // // Strtonum functions @@ -206,15 +209,10 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { // // Character manipulation functions // -# if defined(_LIBCPP_BUILDING_LIBRARY) -inline _LIBCPP_HIDE_FROM_ABI int __islower(int __ch, __locale_t __loc) { return islower_l(__ch, __loc); } -inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __ch, __locale_t __loc) { return isupper_l(__ch, __loc); } -# endif - inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); } -# if defined(_LIBCPP_BUILDING_LIBRARY) +# if defined(_LIBCPP_BUILDING_LIBRARY) inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { return strcoll_l(__s1, __s2, __loc); } @@ -224,7 +222,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, s inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __ch, __locale_t __loc) { return toupper_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __ch, __locale_t __loc) { return tolower_l(__ch, __loc); } -# if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __s1, const wchar_t* __s2, __locale_t __loc) { return wcscoll_l(__s1, __s2, __loc); } @@ -246,7 +244,7 @@ inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __ch, __locale_t __loc) { ret inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __ch, __locale_t __loc) { return iswxdigit_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __ch, __locale_t __loc) { return towupper_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __ch, __locale_t __loc) { return towlower_l(__ch, __loc); } -# endif +# endif inline _LIBCPP_HIDE_FROM_ABI size_t __strftime(char* __s, size_t __max, const char* __format, const tm* __tm, __locale_t __loc) { @@ -259,7 +257,7 @@ __strftime(char* __s, size_t __max, const char* __format, const tm* __tm, __loca inline _LIBCPP_HIDE_FROM_ABI decltype(__libcpp_mb_cur_max_l(__locale_t())) __mb_len_max(__locale_t __loc) { return __libcpp_mb_cur_max_l(__loc); } -# if _LIBCPP_HAS_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __ch, __locale_t __loc) { return __libcpp_btowc_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __ch, __locale_t __loc) { return __libcpp_wctob_l(__ch, __loc); } inline _LIBCPP_HIDE_FROM_ABI size_t @@ -287,17 +285,17 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { return __libcpp_mbsrtowcs_l(__dest, __src, __len, __ps, __loc); } -# endif // _LIBCPP_HAS_WIDE_CHARACTERS -# endif // _LIBCPP_BUILDING_LIBRARY +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates -# ifdef _LIBCPP_COMPILER_CLANG_BASED -# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) -# else -# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ -# endif +# ifdef _LIBCPP_COMPILER_CLANG_BASED +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) +# else +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ +# endif template _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( @@ -315,11 +313,13 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __s return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); } _LIBCPP_DIAGNOSTIC_POP -# undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT +# undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT } // namespace __locale _LIBCPP_END_NAMESPACE_STD -#endif // Compatibility definition of locale base APIs +# endif // Compatibility definition of locale base APIs + +#endif // _LIBCPP_HAS_LOCALIZATION #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_H diff --git a/lib/libcxx/include/__locale_dir/messages.h b/lib/libcxx/include/__locale_dir/messages.h new file mode 100644 index 0000000000..c04bf04025 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/messages.h @@ -0,0 +1,143 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_MESSAGES_H +#define _LIBCPP___LOCALE_DIR_MESSAGES_H + +#include <__config> +#include <__iterator/back_insert_iterator.h> +#include <__locale> +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +# if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) +// Most unix variants have catopen. These are the specific ones that don't. +# if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) && !defined(__EMSCRIPTEN__) +# define _LIBCPP_HAS_CATOPEN 1 +# include +# else +# define _LIBCPP_HAS_CATOPEN 0 +# endif +# else +# define _LIBCPP_HAS_CATOPEN 0 +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +class _LIBCPP_EXPORTED_FROM_ABI messages_base { +public: + typedef intptr_t catalog; + + _LIBCPP_HIDE_FROM_ABI messages_base() {} +}; + +template +class messages : public locale::facet, public messages_base { +public: + typedef _CharT char_type; + typedef basic_string<_CharT> string_type; + + _LIBCPP_HIDE_FROM_ABI explicit messages(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI catalog open(const basic_string& __nm, const locale& __loc) const { + return do_open(__nm, __loc); + } + + _LIBCPP_HIDE_FROM_ABI string_type get(catalog __c, int __set, int __msgid, const string_type& __dflt) const { + return do_get(__c, __set, __msgid, __dflt); + } + + _LIBCPP_HIDE_FROM_ABI void close(catalog __c) const { do_close(__c); } + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~messages() override {} + + virtual catalog do_open(const basic_string&, const locale&) const; + virtual string_type do_get(catalog, int __set, int __msgid, const string_type& __dflt) const; + virtual void do_close(catalog) const; +}; + +template +locale::id messages<_CharT>::id; + +template +typename messages<_CharT>::catalog messages<_CharT>::do_open(const basic_string& __nm, const locale&) const { +# if _LIBCPP_HAS_CATOPEN + return (catalog)catopen(__nm.c_str(), NL_CAT_LOCALE); +# else // !_LIBCPP_HAS_CATOPEN + (void)__nm; + return -1; +# endif // _LIBCPP_HAS_CATOPEN +} + +template +typename messages<_CharT>::string_type +messages<_CharT>::do_get(catalog __c, int __set, int __msgid, const string_type& __dflt) const { +# if _LIBCPP_HAS_CATOPEN + string __ndflt; + __narrow_to_utf8()( + std::back_inserter(__ndflt), __dflt.c_str(), __dflt.c_str() + __dflt.size()); + nl_catd __cat = (nl_catd)__c; + static_assert(sizeof(catalog) >= sizeof(nl_catd), "Unexpected nl_catd type"); + char* __n = catgets(__cat, __set, __msgid, __ndflt.c_str()); + string_type __w; + __widen_from_utf8()(std::back_inserter(__w), __n, __n + std::strlen(__n)); + return __w; +# else // !_LIBCPP_HAS_CATOPEN + (void)__c; + (void)__set; + (void)__msgid; + return __dflt; +# endif // _LIBCPP_HAS_CATOPEN +} + +template +void messages<_CharT>::do_close(catalog __c) const { +# if _LIBCPP_HAS_CATOPEN + catclose((nl_catd)__c); +# else // !_LIBCPP_HAS_CATOPEN + (void)__c; +# endif // _LIBCPP_HAS_CATOPEN +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages; +# endif + +template +class messages_byname : public messages<_CharT> { +public: + typedef messages_base::catalog catalog; + typedef basic_string<_CharT> string_type; + + _LIBCPP_HIDE_FROM_ABI explicit messages_byname(const char*, size_t __refs = 0) : messages<_CharT>(__refs) {} + + _LIBCPP_HIDE_FROM_ABI explicit messages_byname(const string&, size_t __refs = 0) : messages<_CharT>(__refs) {} + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~messages_byname() override {} +}; + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname; +# endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_MESSAGES_H diff --git a/lib/libcxx/include/__locale_dir/money.h b/lib/libcxx/include/__locale_dir/money.h new file mode 100644 index 0000000000..c129666550 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/money.h @@ -0,0 +1,873 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_MONEY_H +#define _LIBCPP___LOCALE_DIR_MONEY_H + +#include <__algorithm/copy.h> +#include <__algorithm/equal.h> +#include <__algorithm/find.h> +#include <__algorithm/reverse.h> +#include <__config> +#include <__locale> +#include <__locale_dir/check_grouping.h> +#include <__locale_dir/get_c_locale.h> +#include <__locale_dir/pad_and_output.h> +#include <__memory/unique_ptr.h> +#include +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +// money_base + +class _LIBCPP_EXPORTED_FROM_ABI money_base { +public: + enum part { none, space, symbol, sign, value }; + struct pattern { + char field[4]; + }; + + _LIBCPP_HIDE_FROM_ABI money_base() {} +}; + +// moneypunct + +template +class moneypunct : public locale::facet, public money_base { +public: + typedef _CharT char_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI explicit moneypunct(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI char_type decimal_point() const { return do_decimal_point(); } + _LIBCPP_HIDE_FROM_ABI char_type thousands_sep() const { return do_thousands_sep(); } + _LIBCPP_HIDE_FROM_ABI string grouping() const { return do_grouping(); } + _LIBCPP_HIDE_FROM_ABI string_type curr_symbol() const { return do_curr_symbol(); } + _LIBCPP_HIDE_FROM_ABI string_type positive_sign() const { return do_positive_sign(); } + _LIBCPP_HIDE_FROM_ABI string_type negative_sign() const { return do_negative_sign(); } + _LIBCPP_HIDE_FROM_ABI int frac_digits() const { return do_frac_digits(); } + _LIBCPP_HIDE_FROM_ABI pattern pos_format() const { return do_pos_format(); } + _LIBCPP_HIDE_FROM_ABI pattern neg_format() const { return do_neg_format(); } + + static locale::id id; + static const bool intl = _International; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~moneypunct() override {} + + virtual char_type do_decimal_point() const { return numeric_limits::max(); } + virtual char_type do_thousands_sep() const { return numeric_limits::max(); } + virtual string do_grouping() const { return string(); } + virtual string_type do_curr_symbol() const { return string_type(); } + virtual string_type do_positive_sign() const { return string_type(); } + virtual string_type do_negative_sign() const { return string_type(1, '-'); } + virtual int do_frac_digits() const { return 0; } + virtual pattern do_pos_format() const { + pattern __p = {{symbol, sign, none, value}}; + return __p; + } + virtual pattern do_neg_format() const { + pattern __p = {{symbol, sign, none, value}}; + return __p; + } +}; + +template +locale::id moneypunct<_CharT, _International>::id; + +template +const bool moneypunct<_CharT, _International>::intl; + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct; +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct; +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct; +# endif + +// moneypunct_byname + +template +class moneypunct_byname : public moneypunct<_CharT, _International> { +public: + typedef money_base::pattern pattern; + typedef _CharT char_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI explicit moneypunct_byname(const char* __nm, size_t __refs = 0) + : moneypunct<_CharT, _International>(__refs) { + init(__nm); + } + + _LIBCPP_HIDE_FROM_ABI explicit moneypunct_byname(const string& __nm, size_t __refs = 0) + : moneypunct<_CharT, _International>(__refs) { + init(__nm.c_str()); + } + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~moneypunct_byname() override {} + + char_type do_decimal_point() const override { return __decimal_point_; } + char_type do_thousands_sep() const override { return __thousands_sep_; } + string do_grouping() const override { return __grouping_; } + string_type do_curr_symbol() const override { return __curr_symbol_; } + string_type do_positive_sign() const override { return __positive_sign_; } + string_type do_negative_sign() const override { return __negative_sign_; } + int do_frac_digits() const override { return __frac_digits_; } + pattern do_pos_format() const override { return __pos_format_; } + pattern do_neg_format() const override { return __neg_format_; } + +private: + char_type __decimal_point_; + char_type __thousands_sep_; + string __grouping_; + string_type __curr_symbol_; + string_type __positive_sign_; + string_type __negative_sign_; + int __frac_digits_; + pattern __pos_format_; + pattern __neg_format_; + + void init(const char*); +}; + +template <> +_LIBCPP_EXPORTED_FROM_ABI void moneypunct_byname::init(const char*); +template <> +_LIBCPP_EXPORTED_FROM_ABI void moneypunct_byname::init(const char*); +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname; +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname; + +# if _LIBCPP_HAS_WIDE_CHARACTERS +template <> +_LIBCPP_EXPORTED_FROM_ABI void moneypunct_byname::init(const char*); +template <> +_LIBCPP_EXPORTED_FROM_ABI void moneypunct_byname::init(const char*); +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname; +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname; +# endif + +// money_get + +template +class __money_get { +protected: + typedef _CharT char_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI __money_get() {} + + static void __gather_info( + bool __intl, + const locale& __loc, + money_base::pattern& __pat, + char_type& __dp, + char_type& __ts, + string& __grp, + string_type& __sym, + string_type& __psn, + string_type& __nsn, + int& __fd); +}; + +template +void __money_get<_CharT>::__gather_info( + bool __intl, + const locale& __loc, + money_base::pattern& __pat, + char_type& __dp, + char_type& __ts, + string& __grp, + string_type& __sym, + string_type& __psn, + string_type& __nsn, + int& __fd) { + if (__intl) { + const moneypunct& __mp = std::use_facet >(__loc); + __pat = __mp.neg_format(); + __nsn = __mp.negative_sign(); + __psn = __mp.positive_sign(); + __dp = __mp.decimal_point(); + __ts = __mp.thousands_sep(); + __grp = __mp.grouping(); + __sym = __mp.curr_symbol(); + __fd = __mp.frac_digits(); + } else { + const moneypunct& __mp = std::use_facet >(__loc); + __pat = __mp.neg_format(); + __nsn = __mp.negative_sign(); + __psn = __mp.positive_sign(); + __dp = __mp.decimal_point(); + __ts = __mp.thousands_sep(); + __grp = __mp.grouping(); + __sym = __mp.curr_symbol(); + __fd = __mp.frac_digits(); + } +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get; +# endif + +template > +class money_get : public locale::facet, private __money_get<_CharT> { +public: + typedef _CharT char_type; + typedef _InputIterator iter_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI explicit money_get(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, long double& __v) const { + return do_get(__b, __e, __intl, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, string_type& __v) const { + return do_get(__b, __e, __intl, __iob, __err, __v); + } + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~money_get() override {} + + virtual iter_type + do_get(iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, long double& __v) const; + virtual iter_type + do_get(iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, string_type& __v) const; + +private: + static bool __do_get( + iter_type& __b, + iter_type __e, + bool __intl, + const locale& __loc, + ios_base::fmtflags __flags, + ios_base::iostate& __err, + bool& __neg, + const ctype& __ct, + unique_ptr& __wb, + char_type*& __wn, + char_type* __we); +}; + +template +locale::id money_get<_CharT, _InputIterator>::id; + +_LIBCPP_EXPORTED_FROM_ABI void __do_nothing(void*); + +template +_LIBCPP_HIDE_FROM_ABI void __double_or_nothing(unique_ptr<_Tp, void (*)(void*)>& __b, _Tp*& __n, _Tp*& __e) { + bool __owns = __b.get_deleter() != __do_nothing; + size_t __cur_cap = static_cast(__e - __b.get()) * sizeof(_Tp); + size_t __new_cap = __cur_cap < numeric_limits::max() / 2 ? 2 * __cur_cap : numeric_limits::max(); + if (__new_cap == 0) + __new_cap = sizeof(_Tp); + size_t __n_off = static_cast(__n - __b.get()); + _Tp* __t = (_Tp*)std::realloc(__owns ? __b.get() : 0, __new_cap); + if (__t == 0) + std::__throw_bad_alloc(); + if (__owns) + __b.release(); + else + std::memcpy(__t, __b.get(), __cur_cap); + __b = unique_ptr<_Tp, void (*)(void*)>(__t, free); + __new_cap /= sizeof(_Tp); + __n = __b.get() + __n_off; + __e = __b.get() + __new_cap; +} + +// true == success +template +bool money_get<_CharT, _InputIterator>::__do_get( + iter_type& __b, + iter_type __e, + bool __intl, + const locale& __loc, + ios_base::fmtflags __flags, + ios_base::iostate& __err, + bool& __neg, + const ctype& __ct, + unique_ptr& __wb, + char_type*& __wn, + char_type* __we) { + if (__b == __e) { + __err |= ios_base::failbit; + return false; + } + const unsigned __bz = 100; + unsigned __gbuf[__bz]; + unique_ptr __gb(__gbuf, __do_nothing); + unsigned* __gn = __gb.get(); + unsigned* __ge = __gn + __bz; + money_base::pattern __pat; + char_type __dp; + char_type __ts; + string __grp; + string_type __sym; + string_type __psn; + string_type __nsn; + // Capture the spaces read into money_base::{space,none} so they + // can be compared to initial spaces in __sym. + string_type __spaces; + int __fd; + __money_get<_CharT>::__gather_info(__intl, __loc, __pat, __dp, __ts, __grp, __sym, __psn, __nsn, __fd); + const string_type* __trailing_sign = 0; + __wn = __wb.get(); + for (unsigned __p = 0; __p < 4 && __b != __e; ++__p) { + switch (__pat.field[__p]) { + case money_base::space: + if (__p != 3) { + if (__ct.is(ctype_base::space, *__b)) + __spaces.push_back(*__b++); + else { + __err |= ios_base::failbit; + return false; + } + } + [[__fallthrough__]]; + case money_base::none: + if (__p != 3) { + while (__b != __e && __ct.is(ctype_base::space, *__b)) + __spaces.push_back(*__b++); + } + break; + case money_base::sign: + if (__psn.size() > 0 && *__b == __psn[0]) { + ++__b; + __neg = false; + if (__psn.size() > 1) + __trailing_sign = std::addressof(__psn); + break; + } + if (__nsn.size() > 0 && *__b == __nsn[0]) { + ++__b; + __neg = true; + if (__nsn.size() > 1) + __trailing_sign = std::addressof(__nsn); + break; + } + if (__psn.size() > 0 && __nsn.size() > 0) { // sign is required + __err |= ios_base::failbit; + return false; + } + if (__psn.size() == 0 && __nsn.size() == 0) + // locale has no way of specifying a sign. Use the initial value of __neg as a default + break; + __neg = (__nsn.size() == 0); + break; + case money_base::symbol: { + bool __more_needed = + __trailing_sign || (__p < 2) || (__p == 2 && __pat.field[3] != static_cast(money_base::none)); + bool __sb = (__flags & ios_base::showbase) != 0; + if (__sb || __more_needed) { + typename string_type::const_iterator __sym_space_end = __sym.begin(); + if (__p > 0 && (__pat.field[__p - 1] == money_base::none || __pat.field[__p - 1] == money_base::space)) { + // Match spaces we've already read against spaces at + // the beginning of __sym. + while (__sym_space_end != __sym.end() && __ct.is(ctype_base::space, *__sym_space_end)) + ++__sym_space_end; + const size_t __num_spaces = __sym_space_end - __sym.begin(); + if (__num_spaces > __spaces.size() || + !std::equal(__spaces.end() - __num_spaces, __spaces.end(), __sym.begin())) { + // No match. Put __sym_space_end back at the + // beginning of __sym, which will prevent a + // match in the next loop. + __sym_space_end = __sym.begin(); + } + } + typename string_type::const_iterator __sym_curr_char = __sym_space_end; + while (__sym_curr_char != __sym.end() && __b != __e && *__b == *__sym_curr_char) { + ++__b; + ++__sym_curr_char; + } + if (__sb && __sym_curr_char != __sym.end()) { + __err |= ios_base::failbit; + return false; + } + } + } break; + case money_base::value: { + unsigned __ng = 0; + for (; __b != __e; ++__b) { + char_type __c = *__b; + if (__ct.is(ctype_base::digit, __c)) { + if (__wn == __we) + std::__double_or_nothing(__wb, __wn, __we); + *__wn++ = __c; + ++__ng; + } else if (__grp.size() > 0 && __ng > 0 && __c == __ts) { + if (__gn == __ge) + std::__double_or_nothing(__gb, __gn, __ge); + *__gn++ = __ng; + __ng = 0; + } else + break; + } + if (__gb.get() != __gn && __ng > 0) { + if (__gn == __ge) + std::__double_or_nothing(__gb, __gn, __ge); + *__gn++ = __ng; + } + if (__fd > 0) { + if (__b == __e || *__b != __dp) { + __err |= ios_base::failbit; + return false; + } + for (++__b; __fd > 0; --__fd, ++__b) { + if (__b == __e || !__ct.is(ctype_base::digit, *__b)) { + __err |= ios_base::failbit; + return false; + } + if (__wn == __we) + std::__double_or_nothing(__wb, __wn, __we); + *__wn++ = *__b; + } + } + if (__wn == __wb.get()) { + __err |= ios_base::failbit; + return false; + } + } break; + } + } + if (__trailing_sign) { + for (unsigned __i = 1; __i < __trailing_sign->size(); ++__i, ++__b) { + if (__b == __e || *__b != (*__trailing_sign)[__i]) { + __err |= ios_base::failbit; + return false; + } + } + } + if (__gb.get() != __gn) { + ios_base::iostate __et = ios_base::goodbit; + __check_grouping(__grp, __gb.get(), __gn, __et); + if (__et) { + __err |= ios_base::failbit; + return false; + } + } + return true; +} + +template +_InputIterator money_get<_CharT, _InputIterator>::do_get( + iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, long double& __v) const { + const int __bz = 100; + char_type __wbuf[__bz]; + unique_ptr __wb(__wbuf, __do_nothing); + char_type* __wn; + char_type* __we = __wbuf + __bz; + locale __loc = __iob.getloc(); + const ctype& __ct = std::use_facet >(__loc); + bool __neg = false; + if (__do_get(__b, __e, __intl, __loc, __iob.flags(), __err, __neg, __ct, __wb, __wn, __we)) { + const char __src[] = "0123456789"; + char_type __atoms[sizeof(__src) - 1]; + __ct.widen(__src, __src + (sizeof(__src) - 1), __atoms); + char __nbuf[__bz]; + char* __nc = __nbuf; + const char* __nc_in = __nc; + unique_ptr __h(nullptr, free); + if (__wn - __wb.get() > __bz - 2) { + __h.reset((char*)malloc(static_cast(__wn - __wb.get() + 2))); + if (__h.get() == nullptr) + std::__throw_bad_alloc(); + __nc = __h.get(); + __nc_in = __nc; + } + if (__neg) + *__nc++ = '-'; + for (const char_type* __w = __wb.get(); __w < __wn; ++__w, ++__nc) + *__nc = __src[std::find(__atoms, std::end(__atoms), *__w) - __atoms]; + *__nc = char(); + if (sscanf(__nc_in, "%Lf", &__v) != 1) + std::__throw_runtime_error("money_get error"); + } + if (__b == __e) + __err |= ios_base::eofbit; + return __b; +} + +template +_InputIterator money_get<_CharT, _InputIterator>::do_get( + iter_type __b, iter_type __e, bool __intl, ios_base& __iob, ios_base::iostate& __err, string_type& __v) const { + const int __bz = 100; + char_type __wbuf[__bz]; + unique_ptr __wb(__wbuf, __do_nothing); + char_type* __wn; + char_type* __we = __wbuf + __bz; + locale __loc = __iob.getloc(); + const ctype& __ct = std::use_facet >(__loc); + bool __neg = false; + if (__do_get(__b, __e, __intl, __loc, __iob.flags(), __err, __neg, __ct, __wb, __wn, __we)) { + __v.clear(); + if (__neg) + __v.push_back(__ct.widen('-')); + char_type __z = __ct.widen('0'); + char_type* __w; + for (__w = __wb.get(); __w < __wn - 1; ++__w) + if (*__w != __z) + break; + __v.append(__w, __wn); + } + if (__b == __e) + __err |= ios_base::eofbit; + return __b; +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get; +# endif + +// money_put + +template +class __money_put { +protected: + typedef _CharT char_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI __money_put() {} + + static void __gather_info( + bool __intl, + bool __neg, + const locale& __loc, + money_base::pattern& __pat, + char_type& __dp, + char_type& __ts, + string& __grp, + string_type& __sym, + string_type& __sn, + int& __fd); + static void __format( + char_type* __mb, + char_type*& __mi, + char_type*& __me, + ios_base::fmtflags __flags, + const char_type* __db, + const char_type* __de, + const ctype& __ct, + bool __neg, + const money_base::pattern& __pat, + char_type __dp, + char_type __ts, + const string& __grp, + const string_type& __sym, + const string_type& __sn, + int __fd); +}; + +template +void __money_put<_CharT>::__gather_info( + bool __intl, + bool __neg, + const locale& __loc, + money_base::pattern& __pat, + char_type& __dp, + char_type& __ts, + string& __grp, + string_type& __sym, + string_type& __sn, + int& __fd) { + if (__intl) { + const moneypunct& __mp = std::use_facet >(__loc); + if (__neg) { + __pat = __mp.neg_format(); + __sn = __mp.negative_sign(); + } else { + __pat = __mp.pos_format(); + __sn = __mp.positive_sign(); + } + __dp = __mp.decimal_point(); + __ts = __mp.thousands_sep(); + __grp = __mp.grouping(); + __sym = __mp.curr_symbol(); + __fd = __mp.frac_digits(); + } else { + const moneypunct& __mp = std::use_facet >(__loc); + if (__neg) { + __pat = __mp.neg_format(); + __sn = __mp.negative_sign(); + } else { + __pat = __mp.pos_format(); + __sn = __mp.positive_sign(); + } + __dp = __mp.decimal_point(); + __ts = __mp.thousands_sep(); + __grp = __mp.grouping(); + __sym = __mp.curr_symbol(); + __fd = __mp.frac_digits(); + } +} + +template +void __money_put<_CharT>::__format( + char_type* __mb, + char_type*& __mi, + char_type*& __me, + ios_base::fmtflags __flags, + const char_type* __db, + const char_type* __de, + const ctype& __ct, + bool __neg, + const money_base::pattern& __pat, + char_type __dp, + char_type __ts, + const string& __grp, + const string_type& __sym, + const string_type& __sn, + int __fd) { + __me = __mb; + for (char __p : __pat.field) { + switch (__p) { + case money_base::none: + __mi = __me; + break; + case money_base::space: + __mi = __me; + *__me++ = __ct.widen(' '); + break; + case money_base::sign: + if (!__sn.empty()) + *__me++ = __sn[0]; + break; + case money_base::symbol: + if (!__sym.empty() && (__flags & ios_base::showbase)) + __me = std::copy(__sym.begin(), __sym.end(), __me); + break; + case money_base::value: { + // remember start of value so we can reverse it + char_type* __t = __me; + // find beginning of digits + if (__neg) + ++__db; + // find end of digits + const char_type* __d; + for (__d = __db; __d < __de; ++__d) + if (!__ct.is(ctype_base::digit, *__d)) + break; + // print fractional part + if (__fd > 0) { + int __f; + for (__f = __fd; __d > __db && __f > 0; --__f) + *__me++ = *--__d; + char_type __z = __f > 0 ? __ct.widen('0') : char_type(); + for (; __f > 0; --__f) + *__me++ = __z; + *__me++ = __dp; + } + // print units part + if (__d == __db) { + *__me++ = __ct.widen('0'); + } else { + unsigned __ng = 0; + unsigned __ig = 0; + unsigned __gl = __grp.empty() ? numeric_limits::max() : static_cast(__grp[__ig]); + while (__d != __db) { + if (__ng == __gl) { + *__me++ = __ts; + __ng = 0; + if (++__ig < __grp.size()) + __gl = __grp[__ig] == numeric_limits::max() + ? numeric_limits::max() + : static_cast(__grp[__ig]); + } + *__me++ = *--__d; + ++__ng; + } + } + // reverse it + std::reverse(__t, __me); + } break; + } + } + // print rest of sign, if any + if (__sn.size() > 1) + __me = std::copy(__sn.begin() + 1, __sn.end(), __me); + // set alignment + if ((__flags & ios_base::adjustfield) == ios_base::left) + __mi = __me; + else if ((__flags & ios_base::adjustfield) != ios_base::internal) + __mi = __mb; +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put; +# endif + +template > +class money_put : public locale::facet, private __money_put<_CharT> { +public: + typedef _CharT char_type; + typedef _OutputIterator iter_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI explicit money_put(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI iter_type + put(iter_type __s, bool __intl, ios_base& __iob, char_type __fl, long double __units) const { + return do_put(__s, __intl, __iob, __fl, __units); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + put(iter_type __s, bool __intl, ios_base& __iob, char_type __fl, const string_type& __digits) const { + return do_put(__s, __intl, __iob, __fl, __digits); + } + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~money_put() override {} + + virtual iter_type do_put(iter_type __s, bool __intl, ios_base& __iob, char_type __fl, long double __units) const; + virtual iter_type + do_put(iter_type __s, bool __intl, ios_base& __iob, char_type __fl, const string_type& __digits) const; +}; + +template +locale::id money_put<_CharT, _OutputIterator>::id; + +template +_OutputIterator money_put<_CharT, _OutputIterator>::do_put( + iter_type __s, bool __intl, ios_base& __iob, char_type __fl, long double __units) const { + // convert to char + const size_t __bs = 100; + char __buf[__bs]; + char* __bb = __buf; + char_type __digits[__bs]; + char_type* __db = __digits; + int __n = snprintf(__bb, __bs, "%.0Lf", __units); + unique_ptr __hn(nullptr, free); + unique_ptr __hd(0, free); + // secure memory for digit storage + if (static_cast(__n) > __bs - 1) { + __n = __locale::__asprintf(&__bb, _LIBCPP_GET_C_LOCALE, "%.0Lf", __units); + if (__n == -1) + std::__throw_bad_alloc(); + __hn.reset(__bb); + __hd.reset((char_type*)malloc(static_cast(__n) * sizeof(char_type))); + if (__hd == nullptr) + std::__throw_bad_alloc(); + __db = __hd.get(); + } + // gather info + locale __loc = __iob.getloc(); + const ctype& __ct = std::use_facet >(__loc); + __ct.widen(__bb, __bb + __n, __db); + bool __neg = __n > 0 && __bb[0] == '-'; + money_base::pattern __pat; + char_type __dp; + char_type __ts; + string __grp; + string_type __sym; + string_type __sn; + int __fd; + this->__gather_info(__intl, __neg, __loc, __pat, __dp, __ts, __grp, __sym, __sn, __fd); + // secure memory for formatting + char_type __mbuf[__bs]; + char_type* __mb = __mbuf; + unique_ptr __hw(0, free); + size_t __exn = __n > __fd ? (static_cast(__n) - static_cast(__fd)) * 2 + __sn.size() + __sym.size() + + static_cast(__fd) + 1 + : __sn.size() + __sym.size() + static_cast(__fd) + 2; + if (__exn > __bs) { + __hw.reset((char_type*)malloc(__exn * sizeof(char_type))); + __mb = __hw.get(); + if (__mb == 0) + std::__throw_bad_alloc(); + } + // format + char_type* __mi; + char_type* __me; + this->__format( + __mb, __mi, __me, __iob.flags(), __db, __db + __n, __ct, __neg, __pat, __dp, __ts, __grp, __sym, __sn, __fd); + return std::__pad_and_output(__s, __mb, __mi, __me, __iob, __fl); +} + +template +_OutputIterator money_put<_CharT, _OutputIterator>::do_put( + iter_type __s, bool __intl, ios_base& __iob, char_type __fl, const string_type& __digits) const { + // gather info + locale __loc = __iob.getloc(); + const ctype& __ct = std::use_facet >(__loc); + bool __neg = __digits.size() > 0 && __digits[0] == __ct.widen('-'); + money_base::pattern __pat; + char_type __dp; + char_type __ts; + string __grp; + string_type __sym; + string_type __sn; + int __fd; + this->__gather_info(__intl, __neg, __loc, __pat, __dp, __ts, __grp, __sym, __sn, __fd); + // secure memory for formatting + char_type __mbuf[100]; + char_type* __mb = __mbuf; + unique_ptr __h(0, free); + size_t __exn = + static_cast(__digits.size()) > __fd + ? (__digits.size() - static_cast(__fd)) * 2 + __sn.size() + __sym.size() + static_cast(__fd) + + 1 + : __sn.size() + __sym.size() + static_cast(__fd) + 2; + if (__exn > 100) { + __h.reset((char_type*)malloc(__exn * sizeof(char_type))); + __mb = __h.get(); + if (__mb == 0) + std::__throw_bad_alloc(); + } + // format + char_type* __mi; + char_type* __me; + this->__format( + __mb, + __mi, + __me, + __iob.flags(), + __digits.data(), + __digits.data() + __digits.size(), + __ct, + __neg, + __pat, + __dp, + __ts, + __grp, + __sym, + __sn, + __fd); + return std::__pad_and_output(__s, __mb, __mi, __me, __iob, __fl); +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put; +# endif + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_MONEY_H diff --git a/lib/libcxx/include/__locale_dir/num.h b/lib/libcxx/include/__locale_dir/num.h new file mode 100644 index 0000000000..7ca8ffe348 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/num.h @@ -0,0 +1,1072 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_NUM_H +#define _LIBCPP___LOCALE_DIR_NUM_H + +#include <__algorithm/find.h> +#include <__algorithm/reverse.h> +#include <__charconv/to_chars_integral.h> +#include <__charconv/traits.h> +#include <__config> +#include <__iterator/istreambuf_iterator.h> +#include <__iterator/ostreambuf_iterator.h> +#include <__locale_dir/check_grouping.h> +#include <__locale_dir/get_c_locale.h> +#include <__locale_dir/pad_and_output.h> +#include <__locale_dir/scan_keyword.h> +#include <__memory/unique_ptr.h> +#include <__system_error/errc.h> +#include +#include +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +// TODO: Properly qualify calls now that the locale base API defines functions instead of macros +// NOLINTBEGIN(libcpp-robust-against-adl) + +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct _LIBCPP_EXPORTED_FROM_ABI __num_get_base { + static const int __num_get_buf_sz = 40; + + static int __get_base(ios_base&); + static const char __src[33]; // "0123456789abcdefABCDEFxX+-pPiInN" + // count of leading characters in __src used for parsing integers ("012..X+-") + static const size_t __int_chr_cnt = 26; + // count of leading characters in __src used for parsing floating-point values ("012..-pP") + static const size_t __fp_chr_cnt = 28; +}; + +template +struct __num_get : protected __num_get_base { + static string __stage2_float_prep(ios_base& __iob, _CharT* __atoms, _CharT& __decimal_point, _CharT& __thousands_sep); + + static int __stage2_float_loop( + _CharT __ct, + bool& __in_units, + char& __exp, + char* __a, + char*& __a_end, + _CharT __decimal_point, + _CharT __thousands_sep, + const string& __grouping, + unsigned* __g, + unsigned*& __g_end, + unsigned& __dc, + _CharT* __atoms); + + [[__deprecated__("This exists only for ABI compatibility")]] static string + __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep); + static int __stage2_int_loop( + _CharT __ct, + int __base, + char* __a, + char*& __a_end, + unsigned& __dc, + _CharT __thousands_sep, + const string& __grouping, + unsigned* __g, + unsigned*& __g_end, + _CharT* __atoms); + + _LIBCPP_HIDE_FROM_ABI static string __stage2_int_prep(ios_base& __iob, _CharT& __thousands_sep) { + locale __loc = __iob.getloc(); + const numpunct<_CharT>& __np = use_facet >(__loc); + __thousands_sep = __np.thousands_sep(); + return __np.grouping(); + } + + _LIBCPP_HIDE_FROM_ABI const _CharT* __do_widen(ios_base& __iob, _CharT* __atoms) const { + return __do_widen_p(__iob, __atoms); + } + +private: + template + _LIBCPP_HIDE_FROM_ABI const _Tp* __do_widen_p(ios_base& __iob, _Tp* __atoms) const { + locale __loc = __iob.getloc(); + use_facet >(__loc).widen(__src, __src + __int_chr_cnt, __atoms); + return __atoms; + } + + _LIBCPP_HIDE_FROM_ABI const char* __do_widen_p(ios_base& __iob, char* __atoms) const { + (void)__iob; + (void)__atoms; + return __src; + } +}; + +template +string __num_get<_CharT>::__stage2_float_prep( + ios_base& __iob, _CharT* __atoms, _CharT& __decimal_point, _CharT& __thousands_sep) { + locale __loc = __iob.getloc(); + std::use_facet >(__loc).widen(__src, __src + __fp_chr_cnt, __atoms); + const numpunct<_CharT>& __np = std::use_facet >(__loc); + __decimal_point = __np.decimal_point(); + __thousands_sep = __np.thousands_sep(); + return __np.grouping(); +} + +template +int __num_get<_CharT>::__stage2_int_loop( + _CharT __ct, + int __base, + char* __a, + char*& __a_end, + unsigned& __dc, + _CharT __thousands_sep, + const string& __grouping, + unsigned* __g, + unsigned*& __g_end, + _CharT* __atoms) { + if (__a_end == __a && (__ct == __atoms[24] || __ct == __atoms[25])) { + *__a_end++ = __ct == __atoms[24] ? '+' : '-'; + __dc = 0; + return 0; + } + if (__grouping.size() != 0 && __ct == __thousands_sep) { + if (__g_end - __g < __num_get_buf_sz) { + *__g_end++ = __dc; + __dc = 0; + } + return 0; + } + ptrdiff_t __f = std::find(__atoms, __atoms + __int_chr_cnt, __ct) - __atoms; + if (__f >= 24) + return -1; + switch (__base) { + case 8: + case 10: + if (__f >= __base) + return -1; + break; + case 16: + if (__f < 22) + break; + if (__a_end != __a && __a_end - __a <= 2 && __a_end[-1] == '0') { + __dc = 0; + *__a_end++ = __src[__f]; + return 0; + } + return -1; + } + *__a_end++ = __src[__f]; + ++__dc; + return 0; +} + +template +int __num_get<_CharT>::__stage2_float_loop( + _CharT __ct, + bool& __in_units, + char& __exp, + char* __a, + char*& __a_end, + _CharT __decimal_point, + _CharT __thousands_sep, + const string& __grouping, + unsigned* __g, + unsigned*& __g_end, + unsigned& __dc, + _CharT* __atoms) { + if (__ct == __decimal_point) { + if (!__in_units) + return -1; + __in_units = false; + *__a_end++ = '.'; + if (__grouping.size() != 0 && __g_end - __g < __num_get_buf_sz) + *__g_end++ = __dc; + return 0; + } + if (__ct == __thousands_sep && __grouping.size() != 0) { + if (!__in_units) + return -1; + if (__g_end - __g < __num_get_buf_sz) { + *__g_end++ = __dc; + __dc = 0; + } + return 0; + } + ptrdiff_t __f = std::find(__atoms, __atoms + __num_get_base::__fp_chr_cnt, __ct) - __atoms; + if (__f >= static_cast(__num_get_base::__fp_chr_cnt)) + return -1; + char __x = __src[__f]; + if (__x == '-' || __x == '+') { + if (__a_end == __a || (std::toupper(__a_end[-1]) == std::toupper(__exp))) { + *__a_end++ = __x; + return 0; + } + return -1; + } + if (__x == 'x' || __x == 'X') + __exp = 'P'; + else if (std::toupper(__x) == __exp) { + __exp = std::tolower(__exp); + if (__in_units) { + __in_units = false; + if (__grouping.size() != 0 && __g_end - __g < __num_get_buf_sz) + *__g_end++ = __dc; + } + } + *__a_end++ = __x; + if (__f >= 22) + return 0; + ++__dc; + return 0; +} + +extern template struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get; +# endif + +template +_LIBCPP_HIDE_FROM_ABI _Tp __do_strtod(const char* __a, char** __p2); + +template <> +inline _LIBCPP_HIDE_FROM_ABI float __do_strtod(const char* __a, char** __p2) { + return __locale::__strtof(__a, __p2, _LIBCPP_GET_C_LOCALE); +} + +template <> +inline _LIBCPP_HIDE_FROM_ABI double __do_strtod(const char* __a, char** __p2) { + return __locale::__strtod(__a, __p2, _LIBCPP_GET_C_LOCALE); +} + +template <> +inline _LIBCPP_HIDE_FROM_ABI long double __do_strtod(const char* __a, char** __p2) { + return __locale::__strtold(__a, __p2, _LIBCPP_GET_C_LOCALE); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __num_get_float(const char* __a, const char* __a_end, ios_base::iostate& __err) { + if (__a != __a_end) { + __libcpp_remove_reference_t __save_errno = errno; + errno = 0; + char* __p2; + _Tp __ld = std::__do_strtod<_Tp>(__a, &__p2); + __libcpp_remove_reference_t __current_errno = errno; + if (__current_errno == 0) + errno = __save_errno; + if (__p2 != __a_end) { + __err = ios_base::failbit; + return 0; + } else if (__current_errno == ERANGE) + __err = ios_base::failbit; + return __ld; + } + __err = ios_base::failbit; + return 0; +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__num_get_signed_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) { + if (__a != __a_end) { + __libcpp_remove_reference_t __save_errno = errno; + errno = 0; + char* __p2; + long long __ll = __locale::__strtoll(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE); + __libcpp_remove_reference_t __current_errno = errno; + if (__current_errno == 0) + errno = __save_errno; + if (__p2 != __a_end) { + __err = ios_base::failbit; + return 0; + } else if (__current_errno == ERANGE || __ll < numeric_limits<_Tp>::min() || numeric_limits<_Tp>::max() < __ll) { + __err = ios_base::failbit; + if (__ll > 0) + return numeric_limits<_Tp>::max(); + else + return numeric_limits<_Tp>::min(); + } + return static_cast<_Tp>(__ll); + } + __err = ios_base::failbit; + return 0; +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__num_get_unsigned_integral(const char* __a, const char* __a_end, ios_base::iostate& __err, int __base) { + if (__a != __a_end) { + const bool __negate = *__a == '-'; + if (__negate && ++__a == __a_end) { + __err = ios_base::failbit; + return 0; + } + __libcpp_remove_reference_t __save_errno = errno; + errno = 0; + char* __p2; + unsigned long long __ll = __locale::__strtoull(__a, &__p2, __base, _LIBCPP_GET_C_LOCALE); + __libcpp_remove_reference_t __current_errno = errno; + if (__current_errno == 0) + errno = __save_errno; + if (__p2 != __a_end) { + __err = ios_base::failbit; + return 0; + } else if (__current_errno == ERANGE || numeric_limits<_Tp>::max() < __ll) { + __err = ios_base::failbit; + return numeric_limits<_Tp>::max(); + } + _Tp __res = static_cast<_Tp>(__ll); + if (__negate) + __res = -__res; + return __res; + } + __err = ios_base::failbit; + return 0; +} + +template > +class num_get : public locale::facet, private __num_get<_CharT> { +public: + typedef _CharT char_type; + typedef _InputIterator iter_type; + + _LIBCPP_HIDE_FROM_ABI explicit num_get(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, double& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long double& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const { + return do_get(__b, __e, __iob, __err, __v); + } + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~num_get() override {} + + template + _LIBCPP_HIDE_FROM_ABI iter_type + __do_get_floating_point(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Fp& __v) const { + // Stage 1, nothing to do + // Stage 2 + char_type __atoms[__num_get_base::__fp_chr_cnt]; + char_type __decimal_point; + char_type __thousands_sep; + string __grouping = this->__stage2_float_prep(__iob, __atoms, __decimal_point, __thousands_sep); + string __buf; + __buf.resize(__buf.capacity()); + char* __a = &__buf[0]; + char* __a_end = __a; + unsigned __g[__num_get_base::__num_get_buf_sz]; + unsigned* __g_end = __g; + unsigned __dc = 0; + bool __in_units = true; + char __exp = 'E'; + bool __is_leading_parsed = false; + for (; __b != __e; ++__b) { + if (__a_end == __a + __buf.size()) { + size_t __tmp = __buf.size(); + __buf.resize(2 * __buf.size()); + __buf.resize(__buf.capacity()); + __a = &__buf[0]; + __a_end = __a + __tmp; + } + if (this->__stage2_float_loop( + *__b, + __in_units, + __exp, + __a, + __a_end, + __decimal_point, + __thousands_sep, + __grouping, + __g, + __g_end, + __dc, + __atoms)) + break; + + // the leading character excluding the sign must be a decimal digit + if (!__is_leading_parsed) { + if (__a_end - __a >= 1 && __a[0] != '-' && __a[0] != '+') { + if (('0' <= __a[0] && __a[0] <= '9') || __a[0] == '.') + __is_leading_parsed = true; + else + break; + } else if (__a_end - __a >= 2 && (__a[0] == '-' || __a[0] == '+')) { + if (('0' <= __a[1] && __a[1] <= '9') || __a[1] == '.') + __is_leading_parsed = true; + else + break; + } + } + } + if (__grouping.size() != 0 && __in_units && __g_end - __g < __num_get_base::__num_get_buf_sz) + *__g_end++ = __dc; + // Stage 3 + __v = std::__num_get_float<_Fp>(__a, __a_end, __err); + // Digit grouping checked + __check_grouping(__grouping, __g, __g_end, __err); + // EOF checked + if (__b == __e) + __err |= ios_base::eofbit; + return __b; + } + + template + _LIBCPP_HIDE_FROM_ABI iter_type + __do_get_signed(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Signed& __v) const { + // Stage 1 + int __base = this->__get_base(__iob); + // Stage 2 + char_type __thousands_sep; + const int __atoms_size = __num_get_base::__int_chr_cnt; + char_type __atoms1[__atoms_size]; + const char_type* __atoms = this->__do_widen(__iob, __atoms1); + string __grouping = this->__stage2_int_prep(__iob, __thousands_sep); + string __buf; + __buf.resize(__buf.capacity()); + char* __a = &__buf[0]; + char* __a_end = __a; + unsigned __g[__num_get_base::__num_get_buf_sz]; + unsigned* __g_end = __g; + unsigned __dc = 0; + for (; __b != __e; ++__b) { + if (__a_end == __a + __buf.size()) { + size_t __tmp = __buf.size(); + __buf.resize(2 * __buf.size()); + __buf.resize(__buf.capacity()); + __a = &__buf[0]; + __a_end = __a + __tmp; + } + if (this->__stage2_int_loop( + *__b, + __base, + __a, + __a_end, + __dc, + __thousands_sep, + __grouping, + __g, + __g_end, + const_cast(__atoms))) + break; + } + if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz) + *__g_end++ = __dc; + // Stage 3 + __v = std::__num_get_signed_integral<_Signed>(__a, __a_end, __err, __base); + // Digit grouping checked + __check_grouping(__grouping, __g, __g_end, __err); + // EOF checked + if (__b == __e) + __err |= ios_base::eofbit; + return __b; + } + + template + _LIBCPP_HIDE_FROM_ABI iter_type + __do_get_unsigned(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, _Unsigned& __v) const { + // Stage 1 + int __base = this->__get_base(__iob); + // Stage 2 + char_type __thousands_sep; + const int __atoms_size = __num_get_base::__int_chr_cnt; + char_type __atoms1[__atoms_size]; + const char_type* __atoms = this->__do_widen(__iob, __atoms1); + string __grouping = this->__stage2_int_prep(__iob, __thousands_sep); + string __buf; + __buf.resize(__buf.capacity()); + char* __a = &__buf[0]; + char* __a_end = __a; + unsigned __g[__num_get_base::__num_get_buf_sz]; + unsigned* __g_end = __g; + unsigned __dc = 0; + for (; __b != __e; ++__b) { + if (__a_end == __a + __buf.size()) { + size_t __tmp = __buf.size(); + __buf.resize(2 * __buf.size()); + __buf.resize(__buf.capacity()); + __a = &__buf[0]; + __a_end = __a + __tmp; + } + if (this->__stage2_int_loop( + *__b, + __base, + __a, + __a_end, + __dc, + __thousands_sep, + __grouping, + __g, + __g_end, + const_cast(__atoms))) + break; + } + if (__grouping.size() != 0 && __g_end - __g < __num_get_base::__num_get_buf_sz) + *__g_end++ = __dc; + // Stage 3 + __v = std::__num_get_unsigned_integral<_Unsigned>(__a, __a_end, __err, __base); + // Digit grouping checked + __check_grouping(__grouping, __g, __g_end, __err); + // EOF checked + if (__b == __e) + __err |= ios_base::eofbit; + return __b; + } + + virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const; + + virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long& __v) const { + return this->__do_get_signed(__b, __e, __iob, __err, __v); + } + + virtual iter_type + do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long long& __v) const { + return this->__do_get_signed(__b, __e, __iob, __err, __v); + } + + virtual iter_type + do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned short& __v) const { + return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + } + + virtual iter_type + do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned int& __v) const { + return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + } + + virtual iter_type + do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long& __v) const { + return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + } + + virtual iter_type + do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, unsigned long long& __v) const { + return this->__do_get_unsigned(__b, __e, __iob, __err, __v); + } + + virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, float& __v) const { + return this->__do_get_floating_point(__b, __e, __iob, __err, __v); + } + + virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, double& __v) const { + return this->__do_get_floating_point(__b, __e, __iob, __err, __v); + } + + virtual iter_type + do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, long double& __v) const { + return this->__do_get_floating_point(__b, __e, __iob, __err, __v); + } + + virtual iter_type do_get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const; +}; + +template +locale::id num_get<_CharT, _InputIterator>::id; + +template +_InputIterator num_get<_CharT, _InputIterator>::do_get( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, bool& __v) const { + if ((__iob.flags() & ios_base::boolalpha) == 0) { + long __lv = -1; + __b = do_get(__b, __e, __iob, __err, __lv); + switch (__lv) { + case 0: + __v = false; + break; + case 1: + __v = true; + break; + default: + __v = true; + __err = ios_base::failbit; + break; + } + return __b; + } + const ctype<_CharT>& __ct = std::use_facet >(__iob.getloc()); + const numpunct<_CharT>& __np = std::use_facet >(__iob.getloc()); + typedef typename numpunct<_CharT>::string_type string_type; + const string_type __names[2] = {__np.truename(), __np.falsename()}; + const string_type* __i = std::__scan_keyword(__b, __e, __names, __names + 2, __ct, __err); + __v = __i == __names; + return __b; +} + +template +_InputIterator num_get<_CharT, _InputIterator>::do_get( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, void*& __v) const { + // Stage 1 + int __base = 16; + // Stage 2 + char_type __atoms[__num_get_base::__int_chr_cnt]; + char_type __thousands_sep = char_type(); + string __grouping; + std::use_facet >(__iob.getloc()) + .widen(__num_get_base::__src, __num_get_base::__src + __num_get_base::__int_chr_cnt, __atoms); + string __buf; + __buf.resize(__buf.capacity()); + char* __a = &__buf[0]; + char* __a_end = __a; + unsigned __g[__num_get_base::__num_get_buf_sz]; + unsigned* __g_end = __g; + unsigned __dc = 0; + for (; __b != __e; ++__b) { + if (__a_end == __a + __buf.size()) { + size_t __tmp = __buf.size(); + __buf.resize(2 * __buf.size()); + __buf.resize(__buf.capacity()); + __a = &__buf[0]; + __a_end = __a + __tmp; + } + if (this->__stage2_int_loop(*__b, __base, __a, __a_end, __dc, __thousands_sep, __grouping, __g, __g_end, __atoms)) + break; + } + // Stage 3 + __buf.resize(__a_end - __a); + if (__locale::__sscanf(__buf.c_str(), _LIBCPP_GET_C_LOCALE, "%p", &__v) != 1) + __err = ios_base::failbit; + // EOF checked + if (__b == __e) + __err |= ios_base::eofbit; + return __b; +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get; +# endif + +struct _LIBCPP_EXPORTED_FROM_ABI __num_put_base { +protected: + static void __format_int(char* __fmt, const char* __len, bool __signd, ios_base::fmtflags __flags); + static bool __format_float(char* __fmt, const char* __len, ios_base::fmtflags __flags); + static char* __identify_padding(char* __nb, char* __ne, const ios_base& __iob); +}; + +template +struct __num_put : protected __num_put_base { + static void __widen_and_group_int( + char* __nb, char* __np, char* __ne, _CharT* __ob, _CharT*& __op, _CharT*& __oe, const locale& __loc); + static void __widen_and_group_float( + char* __nb, char* __np, char* __ne, _CharT* __ob, _CharT*& __op, _CharT*& __oe, const locale& __loc); +}; + +template +void __num_put<_CharT>::__widen_and_group_int( + char* __nb, char* __np, char* __ne, _CharT* __ob, _CharT*& __op, _CharT*& __oe, const locale& __loc) { + const ctype<_CharT>& __ct = std::use_facet >(__loc); + const numpunct<_CharT>& __npt = std::use_facet >(__loc); + string __grouping = __npt.grouping(); + if (__grouping.empty()) { + __ct.widen(__nb, __ne, __ob); + __oe = __ob + (__ne - __nb); + } else { + __oe = __ob; + char* __nf = __nb; + if (*__nf == '-' || *__nf == '+') + *__oe++ = __ct.widen(*__nf++); + if (__ne - __nf >= 2 && __nf[0] == '0' && (__nf[1] == 'x' || __nf[1] == 'X')) { + *__oe++ = __ct.widen(*__nf++); + *__oe++ = __ct.widen(*__nf++); + } + std::reverse(__nf, __ne); + _CharT __thousands_sep = __npt.thousands_sep(); + unsigned __dc = 0; + unsigned __dg = 0; + for (char* __p = __nf; __p < __ne; ++__p) { + if (static_cast(__grouping[__dg]) > 0 && __dc == static_cast(__grouping[__dg])) { + *__oe++ = __thousands_sep; + __dc = 0; + if (__dg < __grouping.size() - 1) + ++__dg; + } + *__oe++ = __ct.widen(*__p); + ++__dc; + } + std::reverse(__ob + (__nf - __nb), __oe); + } + if (__np == __ne) + __op = __oe; + else + __op = __ob + (__np - __nb); +} + +template +void __num_put<_CharT>::__widen_and_group_float( + char* __nb, char* __np, char* __ne, _CharT* __ob, _CharT*& __op, _CharT*& __oe, const locale& __loc) { + const ctype<_CharT>& __ct = std::use_facet >(__loc); + const numpunct<_CharT>& __npt = std::use_facet >(__loc); + string __grouping = __npt.grouping(); + __oe = __ob; + char* __nf = __nb; + if (*__nf == '-' || *__nf == '+') + *__oe++ = __ct.widen(*__nf++); + char* __ns; + if (__ne - __nf >= 2 && __nf[0] == '0' && (__nf[1] == 'x' || __nf[1] == 'X')) { + *__oe++ = __ct.widen(*__nf++); + *__oe++ = __ct.widen(*__nf++); + for (__ns = __nf; __ns < __ne; ++__ns) + if (!__locale::__isxdigit(*__ns, _LIBCPP_GET_C_LOCALE)) + break; + } else { + for (__ns = __nf; __ns < __ne; ++__ns) + if (!__locale::__isdigit(*__ns, _LIBCPP_GET_C_LOCALE)) + break; + } + if (__grouping.empty()) { + __ct.widen(__nf, __ns, __oe); + __oe += __ns - __nf; + } else { + std::reverse(__nf, __ns); + _CharT __thousands_sep = __npt.thousands_sep(); + unsigned __dc = 0; + unsigned __dg = 0; + for (char* __p = __nf; __p < __ns; ++__p) { + if (__grouping[__dg] > 0 && __dc == static_cast(__grouping[__dg])) { + *__oe++ = __thousands_sep; + __dc = 0; + if (__dg < __grouping.size() - 1) + ++__dg; + } + *__oe++ = __ct.widen(*__p); + ++__dc; + } + std::reverse(__ob + (__nf - __nb), __oe); + } + for (__nf = __ns; __nf < __ne; ++__nf) { + if (*__nf == '.') { + *__oe++ = __npt.decimal_point(); + ++__nf; + break; + } else + *__oe++ = __ct.widen(*__nf); + } + __ct.widen(__nf, __ne, __oe); + __oe += __ne - __nf; + if (__np == __ne) + __op = __oe; + else + __op = __ob + (__np - __nb); +} + +extern template struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put; +# endif + +template > +class num_put : public locale::facet, private __num_put<_CharT> { +public: + typedef _CharT char_type; + typedef _OutputIterator iter_type; + + _LIBCPP_HIDE_FROM_ABI explicit num_put(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, bool __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, long __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, double __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, long double __v) const { + return do_put(__s, __iob, __fl, __v); + } + + _LIBCPP_HIDE_FROM_ABI iter_type put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const { + return do_put(__s, __iob, __fl, __v); + } + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~num_put() override {} + + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, bool __v) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, long __v) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, double __v) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, long double __v) const; + virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const; + + template + _LIBCPP_HIDE_FROM_ABI inline _OutputIterator + __do_put_integral(iter_type __s, ios_base& __iob, char_type __fl, _Integral __v) const; + + template + _LIBCPP_HIDE_FROM_ABI inline _OutputIterator + __do_put_floating_point(iter_type __s, ios_base& __iob, char_type __fl, _Float __v, char const* __len) const; +}; + +template +locale::id num_put<_CharT, _OutputIterator>::id; + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, bool __v) const { + if ((__iob.flags() & ios_base::boolalpha) == 0) + return do_put(__s, __iob, __fl, (unsigned long)__v); + const numpunct& __np = std::use_facet >(__iob.getloc()); + typedef typename numpunct::string_type string_type; + string_type __nm = __v ? __np.truename() : __np.falsename(); + for (typename string_type::iterator __i = __nm.begin(); __i != __nm.end(); ++__i, ++__s) + *__s = *__i; + return __s; +} + +template +template +_LIBCPP_HIDE_FROM_ABI inline _OutputIterator num_put<_CharT, _OutputIterator>::__do_put_integral( + iter_type __s, ios_base& __iob, char_type __fl, _Integral __v) const { + // Stage 1 - Get number in narrow char + + // Worst case is octal, with showbase enabled. Note that octal is always + // printed as an unsigned value. + using _Unsigned = typename make_unsigned<_Integral>::type; + _LIBCPP_CONSTEXPR const unsigned __buffer_size = + (numeric_limits<_Unsigned>::digits / 3) // 1 char per 3 bits + + ((numeric_limits<_Unsigned>::digits % 3) != 0) // round up + + 2; // base prefix + terminating null character + + char __char_buffer[__buffer_size]; + char* __buffer_ptr = __char_buffer; + + auto __flags = __iob.flags(); + + auto __basefield = (__flags & ios_base::basefield); + + // Extract base + int __base = 10; + if (__basefield == ios_base::oct) + __base = 8; + else if (__basefield == ios_base::hex) + __base = 16; + + // Print '-' and make the argument unsigned + auto __uval = std::__to_unsigned_like(__v); + if (__basefield != ios_base::oct && __basefield != ios_base::hex && __v < 0) { + *__buffer_ptr++ = '-'; + __uval = std::__complement(__uval); + } + + // Maybe add '+' prefix + if (std::is_signed<_Integral>::value && (__flags & ios_base::showpos) && __basefield != ios_base::oct && + __basefield != ios_base::hex && __v >= 0) + *__buffer_ptr++ = '+'; + + // Add base prefix + if (__v != 0 && __flags & ios_base::showbase) { + if (__basefield == ios_base::oct) { + *__buffer_ptr++ = '0'; + } else if (__basefield == ios_base::hex) { + *__buffer_ptr++ = '0'; + *__buffer_ptr++ = (__flags & ios_base::uppercase ? 'X' : 'x'); + } + } + + auto __res = std::__to_chars_integral(__buffer_ptr, __char_buffer + __buffer_size, __uval, __base); + _LIBCPP_ASSERT_INTERNAL(__res.__ec == std::errc(0), "to_chars: invalid maximum buffer size computed?"); + + // Make letters uppercase + if (__flags & ios_base::hex && __flags & ios_base::uppercase) { + for (; __buffer_ptr != __res.__ptr; ++__buffer_ptr) + *__buffer_ptr = std::__hex_to_upper(*__buffer_ptr); + } + + char* __np = this->__identify_padding(__char_buffer, __res.__ptr, __iob); + // Stage 2 - Widen __nar while adding thousands separators + char_type __o[2 * (__buffer_size - 1) - 1]; + char_type* __op; // pad here + char_type* __oe; // end of output + this->__widen_and_group_int(__char_buffer, __np, __res.__ptr, __o, __op, __oe, __iob.getloc()); + // [__o, __oe) contains thousands_sep'd wide number + // Stage 3 & 4 + return std::__pad_and_output(__s, __o, __op, __oe, __iob, __fl); +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long __v) const { + return this->__do_put_integral(__s, __iob, __fl, __v); +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const { + return this->__do_put_integral(__s, __iob, __fl, __v); +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long __v) const { + return this->__do_put_integral(__s, __iob, __fl, __v); +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long __v) const { + return this->__do_put_integral(__s, __iob, __fl, __v); +} + +template +template +_LIBCPP_HIDE_FROM_ABI inline _OutputIterator num_put<_CharT, _OutputIterator>::__do_put_floating_point( + iter_type __s, ios_base& __iob, char_type __fl, _Float __v, char const* __len) const { + // Stage 1 - Get number in narrow char + char __fmt[8] = {'%', 0}; + bool __specify_precision = this->__format_float(__fmt + 1, __len, __iob.flags()); + const unsigned __nbuf = 30; + char __nar[__nbuf]; + char* __nb = __nar; + int __nc; + _LIBCPP_DIAGNOSTIC_PUSH + _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") + _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") + if (__specify_precision) + __nc = __locale::__snprintf(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt, (int)__iob.precision(), __v); + else + __nc = __locale::__snprintf(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt, __v); + unique_ptr __nbh(nullptr, free); + if (__nc > static_cast(__nbuf - 1)) { + if (__specify_precision) + __nc = __locale::__asprintf(&__nb, _LIBCPP_GET_C_LOCALE, __fmt, (int)__iob.precision(), __v); + else + __nc = __locale::__asprintf(&__nb, _LIBCPP_GET_C_LOCALE, __fmt, __v); + if (__nc == -1) + std::__throw_bad_alloc(); + __nbh.reset(__nb); + } + _LIBCPP_DIAGNOSTIC_POP + char* __ne = __nb + __nc; + char* __np = this->__identify_padding(__nb, __ne, __iob); + // Stage 2 - Widen __nar while adding thousands separators + char_type __o[2 * (__nbuf - 1) - 1]; + char_type* __ob = __o; + unique_ptr __obh(0, free); + if (__nb != __nar) { + __ob = (char_type*)malloc(2 * static_cast(__nc) * sizeof(char_type)); + if (__ob == 0) + std::__throw_bad_alloc(); + __obh.reset(__ob); + } + char_type* __op; // pad here + char_type* __oe; // end of output + this->__widen_and_group_float(__nb, __np, __ne, __ob, __op, __oe, __iob.getloc()); + // [__o, __oe) contains thousands_sep'd wide number + // Stage 3 & 4 + __s = std::__pad_and_output(__s, __ob, __op, __oe, __iob, __fl); + return __s; +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, double __v) const { + return this->__do_put_floating_point(__s, __iob, __fl, __v, ""); +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long double __v) const { + return this->__do_put_floating_point(__s, __iob, __fl, __v, "L"); +} + +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const { + auto __flags = __iob.flags(); + __iob.flags((__flags & ~ios_base::basefield & ~ios_base::uppercase) | ios_base::hex | ios_base::showbase); + auto __res = __do_put_integral(__s, __iob, __fl, reinterpret_cast(__v)); + __iob.flags(__flags); + return __res; +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put; +# endif + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +// NOLINTEND(libcpp-robust-against-adl) + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_NUM_H diff --git a/lib/libcxx/include/__locale_dir/scan_keyword.h b/lib/libcxx/include/__locale_dir/scan_keyword.h new file mode 100644 index 0000000000..78dd0a46a8 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/scan_keyword.h @@ -0,0 +1,143 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H +#define _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H + +#include <__config> +#include <__memory/unique_ptr.h> +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// __scan_keyword +// Scans [__b, __e) until a match is found in the basic_strings range +// [__kb, __ke) or until it can be shown that there is no match in [__kb, __ke). +// __b will be incremented (visibly), consuming CharT until a match is found +// or proved to not exist. A keyword may be "", in which will match anything. +// If one keyword is a prefix of another, and the next CharT in the input +// might match another keyword, the algorithm will attempt to find the longest +// matching keyword. If the longer matching keyword ends up not matching, then +// no keyword match is found. If no keyword match is found, __ke is returned +// and failbit is set in __err. +// Else an iterator pointing to the matching keyword is found. If more than +// one keyword matches, an iterator to the first matching keyword is returned. +// If on exit __b == __e, eofbit is set in __err. If __case_sensitive is false, +// __ct is used to force to lower case before comparing characters. +// Examples: +// Keywords: "a", "abb" +// If the input is "a", the first keyword matches and eofbit is set. +// If the input is "abc", no match is found and "ab" are consumed. +template +_LIBCPP_HIDE_FROM_ABI _ForwardIterator __scan_keyword( + _InputIterator& __b, + _InputIterator __e, + _ForwardIterator __kb, + _ForwardIterator __ke, + const _Ctype& __ct, + ios_base::iostate& __err, + bool __case_sensitive = true) { + typedef typename iterator_traits<_InputIterator>::value_type _CharT; + size_t __nkw = static_cast(std::distance(__kb, __ke)); + const unsigned char __doesnt_match = '\0'; + const unsigned char __might_match = '\1'; + const unsigned char __does_match = '\2'; + unsigned char __statbuf[100]; + unsigned char* __status = __statbuf; + unique_ptr __stat_hold(nullptr, free); + if (__nkw > sizeof(__statbuf)) { + __status = (unsigned char*)malloc(__nkw); + if (__status == nullptr) + std::__throw_bad_alloc(); + __stat_hold.reset(__status); + } + size_t __n_might_match = __nkw; // At this point, any keyword might match + size_t __n_does_match = 0; // but none of them definitely do + // Initialize all statuses to __might_match, except for "" keywords are __does_match + unsigned char* __st = __status; + for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) { + if (!__ky->empty()) + *__st = __might_match; + else { + *__st = __does_match; + --__n_might_match; + ++__n_does_match; + } + } + // While there might be a match, test keywords against the next CharT + for (size_t __indx = 0; __b != __e && __n_might_match > 0; ++__indx) { + // Peek at the next CharT but don't consume it + _CharT __c = *__b; + if (!__case_sensitive) + __c = __ct.toupper(__c); + bool __consume = false; + // For each keyword which might match, see if the __indx character is __c + // If a match if found, consume __c + // If a match is found, and that is the last character in the keyword, + // then that keyword matches. + // If the keyword doesn't match this character, then change the keyword + // to doesn't match + __st = __status; + for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) { + if (*__st == __might_match) { + _CharT __kc = (*__ky)[__indx]; + if (!__case_sensitive) + __kc = __ct.toupper(__kc); + if (__c == __kc) { + __consume = true; + if (__ky->size() == __indx + 1) { + *__st = __does_match; + --__n_might_match; + ++__n_does_match; + } + } else { + *__st = __doesnt_match; + --__n_might_match; + } + } + } + // consume if we matched a character + if (__consume) { + ++__b; + // If we consumed a character and there might be a matched keyword that + // was marked matched on a previous iteration, then such keywords + // which are now marked as not matching. + if (__n_might_match + __n_does_match > 1) { + __st = __status; + for (_ForwardIterator __ky = __kb; __ky != __ke; ++__ky, (void)++__st) { + if (*__st == __does_match && __ky->size() != __indx + 1) { + *__st = __doesnt_match; + --__n_does_match; + } + } + } + } + } + // We've exited the loop because we hit eof and/or we have no more "might matches". + if (__b == __e) + __err |= ios_base::eofbit; + // Return the first matching result + for (__st = __status; __kb != __ke; ++__kb, (void)++__st) + if (*__st == __does_match) + break; + if (__kb == __ke) + __err |= ios_base::failbit; + return __kb; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_SCAN_KEYWORD_H diff --git a/lib/libcxx/include/__locale_dir/support/apple.h b/lib/libcxx/include/__locale_dir/support/apple.h index 5216ed2ba7..62eb79c30d 100644 --- a/lib/libcxx/include/__locale_dir/support/apple.h +++ b/lib/libcxx/include/__locale_dir/support/apple.h @@ -15,8 +15,6 @@ # pragma GCC system_header #endif -#include - #include <__locale_dir/support/bsd_like.h> #endif // _LIBCPP___LOCALE_DIR_SUPPORT_APPLE_H diff --git a/lib/libcxx/include/__locale_dir/support/bsd_like.h b/lib/libcxx/include/__locale_dir/support/bsd_like.h index 51b37e5861..2b03e18920 100644 --- a/lib/libcxx/include/__locale_dir/support/bsd_like.h +++ b/lib/libcxx/include/__locale_dir/support/bsd_like.h @@ -24,6 +24,11 @@ # include #endif +/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */ +#if __has_include() +# include +#endif + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -43,9 +48,9 @@ namespace __locale { #define _LIBCPP_ALL_MASK LC_ALL_MASK #define _LIBCPP_LC_ALL LC_ALL -using __locale_t = ::locale_t; +using __locale_t _LIBCPP_NODEBUG = ::locale_t; #if defined(_LIBCPP_BUILDING_LIBRARY) -using __lconv_t = std::lconv; +using __lconv_t _LIBCPP_NODEBUG = std::lconv; inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __locale, __locale_t __base) { return ::newlocale(__category_mask, __locale, __base); @@ -87,12 +92,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { // // Character manipulation functions // -#if defined(_LIBCPP_BUILDING_LIBRARY) -inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return ::islower_l(__c, __loc); } - -inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return ::isupper_l(__c, __loc); } -#endif - inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); } diff --git a/lib/libcxx/include/__locale_dir/support/freebsd.h b/lib/libcxx/include/__locale_dir/support/freebsd.h index 5e24cbd29b..5c6e21e387 100644 --- a/lib/libcxx/include/__locale_dir/support/freebsd.h +++ b/lib/libcxx/include/__locale_dir/support/freebsd.h @@ -15,8 +15,6 @@ # pragma GCC system_header #endif -#include - #include <__locale_dir/support/bsd_like.h> #endif // _LIBCPP___LOCALE_DIR_SUPPORT_FREEBSD_H diff --git a/lib/libcxx/include/__locale_dir/support/fuchsia.h b/lib/libcxx/include/__locale_dir/support/fuchsia.h index fb9de74ab7..4b9e63facb 100644 --- a/lib/libcxx/include/__locale_dir/support/fuchsia.h +++ b/lib/libcxx/include/__locale_dir/support/fuchsia.h @@ -49,10 +49,10 @@ struct __locale_guard { #define _LIBCPP_ALL_MASK LC_ALL_MASK #define _LIBCPP_LC_ALL LC_ALL -using __locale_t = locale_t; +using __locale_t _LIBCPP_NODEBUG = locale_t; #if defined(_LIBCPP_BUILDING_LIBRARY) -using __lconv_t = std::lconv; +using __lconv_t _LIBCPP_NODEBUG = std::lconv; inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) { return ::newlocale(__category_mask, __name, __loc); diff --git a/lib/libcxx/include/__locale_dir/support/linux.h b/lib/libcxx/include/__locale_dir/support/linux.h new file mode 100644 index 0000000000..23bcf44c31 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/linux.h @@ -0,0 +1,281 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_LINUX_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_LINUX_H + +#include <__config> +#include <__cstddef/size_t.h> +#include <__std_mbstate_t.h> +#include <__utility/forward.h> +#include // std::lconv +#include +#include +#include +#include +#include +#include +#if _LIBCPP_HAS_WIDE_CHARACTERS +# include +# include +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(::uselocale(__loc)) {} + + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + if (__old_loc_) + ::uselocale(__old_loc_); + } + + locale_t __old_loc_; + + __locale_guard(__locale_guard const&) = delete; + __locale_guard& operator=(__locale_guard const&) = delete; +}; + +// +// Locale management +// +#define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +#define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +#define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +#define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +#define _LIBCPP_TIME_MASK LC_TIME_MASK +#define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +#define _LIBCPP_ALL_MASK LC_ALL_MASK +#define _LIBCPP_LC_ALL LC_ALL + +using __locale_t _LIBCPP_NODEBUG = ::locale_t; + +#if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t _LIBCPP_NODEBUG = std::lconv; + +inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __locale, __locale_t __base) { + return ::newlocale(__category_mask, __locale, __base); +} + +inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::freelocale(__loc); } + +inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __locale) { + return ::setlocale(__category, __locale); +} + +inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { + __locale_guard __current(__loc); + return std::localeconv(); +} +#endif // _LIBCPP_BUILDING_LIBRARY + +// +// Strtonum functions +// +inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtof_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtod_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtold_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC + return ::strtoll_l(__nptr, __endptr, __base, __loc); +#else + (void)__loc; + return ::strtoll(__nptr, __endptr, __base); +#endif +} + +inline _LIBCPP_HIDE_FROM_ABI unsigned long long +__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { +#if !_LIBCPP_HAS_MUSL_LIBC + return ::strtoull_l(__nptr, __endptr, __base, __loc); +#else + (void)__loc; + return ::strtoull(__nptr, __endptr, __base); +#endif +} + +// +// Character manipulation functions +// +inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return isdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return isxdigit_l(__c, __loc); } + +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return toupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return tolower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { + return strcoll_l(__s1, __s2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) { + return strxfrm_l(__dest, __src, __n, __loc); +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) { + return iswctype_l(__c, __type, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __c, __locale_t __loc) { return iswspace_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __c, __locale_t __loc) { return iswprint_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __c, __locale_t __loc) { return iswcntrl_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __c, __locale_t __loc) { return iswupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __c, __locale_t __loc) { return iswlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __c, __locale_t __loc) { return iswalpha_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __c, __locale_t __loc) { return iswblank_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __c, __locale_t __loc) { return iswdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __c, __locale_t __loc) { return iswpunct_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __c, __locale_t __loc) { return iswxdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __c, __locale_t __loc) { return towupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __c, __locale_t __loc) { return towlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* __ws2, __locale_t __loc) { + return wcscoll_l(__ws1, __ws2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { + return wcsxfrm_l(__dest, __src, __n, __loc); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + +inline _LIBCPP_HIDE_FROM_ABI size_t +__strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t __loc) { + return strftime_l(__s, __max, __format, __tm, __loc); +} + +// +// Other functions +// +inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) { + __locale_guard __current(__loc); + return MB_CUR_MAX; +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __c, __locale_t __loc) { + __locale_guard __current(__loc); + return std::btowc(__c); +} + +inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __c, __locale_t __loc) { + __locale_guard __current(__loc); + return std::wctob(__c); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::wcsnrtombs(__dest, __src, __nwc, __len, __ps); // non-standard +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __wc, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::wcrtomb(__s, __wc, __ps); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::mbsnrtowcs(__dest, __src, __nms, __len, __ps); // non-standard +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbrtowc(__pwc, __s, __n, __ps); +} + +inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbtowc(__pwc, __pmb, __max); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbrlen(__s, __n, __ps); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbsrtowcs(__dest, __src, __len, __ps); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // _LIBCPP_BUILDING_LIBRARY + +#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs +_LIBCPP_HIDE_FROM_ABI +#endif +inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( + char* __s, size_t __n, __locale_t __loc, const char* __format, ...) { + va_list __va; + va_start(__va, __format); + __locale_guard __current(__loc); + int __res = std::vsnprintf(__s, __n, __format, __va); + va_end(__va); + return __res; +} + +#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs +_LIBCPP_HIDE_FROM_ABI +#endif +inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( + char** __s, __locale_t __loc, const char* __format, ...) { + va_list __va; + va_start(__va, __format); + __locale_guard __current(__loc); + int __res = ::vasprintf(__s, __format, __va); // non-standard + va_end(__va); + return __res; +} + +#ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs +_LIBCPP_HIDE_FROM_ABI +#endif +inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( + const char* __s, __locale_t __loc, const char* __format, ...) { + va_list __va; + va_start(__va, __format); + __locale_guard __current(__loc); + int __res = std::vsscanf(__s, __format, __va); + va_end(__va); + return __res; +} + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_LINUX_H diff --git a/lib/libcxx/include/__locale_dir/support/netbsd.h b/lib/libcxx/include/__locale_dir/support/netbsd.h index 190857f6f8..b1e67ade55 100644 --- a/lib/libcxx/include/__locale_dir/support/netbsd.h +++ b/lib/libcxx/include/__locale_dir/support/netbsd.h @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +/* zig patch: https://github.com/llvm/llvm-project/pull/143055 */ + #ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NETBSD_H #define _LIBCPP___LOCALE_DIR_SUPPORT_NETBSD_H diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h index 4fb48ed9ce..1281b8bd13 100644 --- a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h +++ b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h @@ -29,12 +29,6 @@ namespace __locale { // // Character manipulation functions // -#if defined(_LIBCPP_BUILDING_LIBRARY) -inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t) { return std::islower(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t) { return std::isupper(__c); } -#endif - inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); } diff --git a/lib/libcxx/include/__locale_dir/support/windows.h b/lib/libcxx/include/__locale_dir/support/windows.h index 56d34c6f0e..0df8709f11 100644 --- a/lib/libcxx/include/__locale_dir/support/windows.h +++ b/lib/libcxx/include/__locale_dir/support/windows.h @@ -29,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __locale { -using __lconv_t = std::lconv; +using __lconv_t _LIBCPP_NODEBUG = std::lconv; class __lconv_storage { public: @@ -197,12 +197,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { // // Character manipulation functions // -#if defined(_LIBCPP_BUILDING_LIBRARY) -inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return _islower_l(__c, __loc); } - -inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return _isupper_l(__c, __loc); } -#endif - inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); } inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); } @@ -317,7 +311,7 @@ struct __locale_guard { if (std::strcmp(__l.__get_locale(), __lc) != 0) { __locale_all = _strdup(__lc); if (__locale_all == nullptr) - __throw_bad_alloc(); + std::__throw_bad_alloc(); __locale::__setlocale(LC_ALL, __l.__get_locale()); } } diff --git a/lib/libcxx/include/__locale_dir/time.h b/lib/libcxx/include/__locale_dir/time.h new file mode 100644 index 0000000000..5f60d5f36b --- /dev/null +++ b/lib/libcxx/include/__locale_dir/time.h @@ -0,0 +1,766 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_TIME_H +#define _LIBCPP___LOCALE_DIR_TIME_H + +#include <__algorithm/copy.h> +#include <__config> +#include <__locale_dir/get_c_locale.h> +#include <__locale_dir/scan_keyword.h> +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +_LIBCPP_HIDE_FROM_ABI int __get_up_to_n_digits( + _InputIterator& __b, _InputIterator __e, ios_base::iostate& __err, const ctype<_CharT>& __ct, int __n) { + // Precondition: __n >= 1 + if (__b == __e) { + __err |= ios_base::eofbit | ios_base::failbit; + return 0; + } + // get first digit + _CharT __c = *__b; + if (!__ct.is(ctype_base::digit, __c)) { + __err |= ios_base::failbit; + return 0; + } + int __r = __ct.narrow(__c, 0) - '0'; + for (++__b, (void)--__n; __b != __e && __n > 0; ++__b, (void)--__n) { + // get next digit + __c = *__b; + if (!__ct.is(ctype_base::digit, __c)) + return __r; + __r = __r * 10 + __ct.narrow(__c, 0) - '0'; + } + if (__b == __e) + __err |= ios_base::eofbit; + return __r; +} + +class _LIBCPP_EXPORTED_FROM_ABI time_base { +public: + enum dateorder { no_order, dmy, mdy, ymd, ydm }; +}; + +template +class __time_get_c_storage { +protected: + typedef basic_string<_CharT> string_type; + + virtual const string_type* __weeks() const; + virtual const string_type* __months() const; + virtual const string_type* __am_pm() const; + virtual const string_type& __c() const; + virtual const string_type& __r() const; + virtual const string_type& __x() const; + virtual const string_type& __X() const; + + _LIBCPP_HIDE_FROM_ABI ~__time_get_c_storage() {} +}; + +template <> +_LIBCPP_EXPORTED_FROM_ABI const string* __time_get_c_storage::__weeks() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const string* __time_get_c_storage::__months() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const string* __time_get_c_storage::__am_pm() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const string& __time_get_c_storage::__c() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const string& __time_get_c_storage::__r() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const string& __time_get_c_storage::__x() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const string& __time_get_c_storage::__X() const; + +# if _LIBCPP_HAS_WIDE_CHARACTERS +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring* __time_get_c_storage::__weeks() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring* __time_get_c_storage::__months() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring* __time_get_c_storage::__am_pm() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring& __time_get_c_storage::__c() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring& __time_get_c_storage::__r() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring& __time_get_c_storage::__x() const; +template <> +_LIBCPP_EXPORTED_FROM_ABI const wstring& __time_get_c_storage::__X() const; +# endif + +template > +class time_get : public locale::facet, public time_base, private __time_get_c_storage<_CharT> { +public: + typedef _CharT char_type; + typedef _InputIterator iter_type; + typedef time_base::dateorder dateorder; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI explicit time_get(size_t __refs = 0) : locale::facet(__refs) {} + + _LIBCPP_HIDE_FROM_ABI dateorder date_order() const { return this->do_date_order(); } + + _LIBCPP_HIDE_FROM_ABI iter_type + get_time(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + return do_get_time(__b, __e, __iob, __err, __tm); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get_date(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + return do_get_date(__b, __e, __iob, __err, __tm); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get_weekday(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + return do_get_weekday(__b, __e, __iob, __err, __tm); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get_monthname(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + return do_get_monthname(__b, __e, __iob, __err, __tm); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get_year(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + return do_get_year(__b, __e, __iob, __err, __tm); + } + + _LIBCPP_HIDE_FROM_ABI iter_type + get(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm, char __fmt, char __mod = 0) + const { + return do_get(__b, __e, __iob, __err, __tm, __fmt, __mod); + } + + iter_type + get(iter_type __b, + iter_type __e, + ios_base& __iob, + ios_base::iostate& __err, + tm* __tm, + const char_type* __fmtb, + const char_type* __fmte) const; + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~time_get() override {} + + virtual dateorder do_date_order() const; + virtual iter_type + do_get_time(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const; + virtual iter_type + do_get_date(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const; + virtual iter_type + do_get_weekday(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const; + virtual iter_type + do_get_monthname(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const; + virtual iter_type + do_get_year(iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const; + virtual iter_type do_get( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm, char __fmt, char __mod) const; + +private: + void __get_white_space(iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void __get_percent(iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + + void __get_weekdayname( + int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void __get_monthname( + int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void __get_day(int& __d, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_month(int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_year(int& __y, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_year4(int& __y, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_hour(int& __d, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_12_hour(int& __h, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_am_pm(int& __h, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_minute(int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_second(int& __s, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void + __get_weekday(int& __w, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; + void __get_day_year_num( + int& __w, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const; +}; + +template +locale::id time_get<_CharT, _InputIterator>::id; + +// time_get primitives + +template +void time_get<_CharT, _InputIterator>::__get_weekdayname( + int& __w, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + // Note: ignoring case comes from the POSIX strptime spec + const string_type* __wk = this->__weeks(); + ptrdiff_t __i = std::__scan_keyword(__b, __e, __wk, __wk + 14, __ct, __err, false) - __wk; + if (__i < 14) + __w = __i % 7; +} + +template +void time_get<_CharT, _InputIterator>::__get_monthname( + int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + // Note: ignoring case comes from the POSIX strptime spec + const string_type* __month = this->__months(); + ptrdiff_t __i = std::__scan_keyword(__b, __e, __month, __month + 24, __ct, __err, false) - __month; + if (__i < 24) + __m = __i % 12; +} + +template +void time_get<_CharT, _InputIterator>::__get_day( + int& __d, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 2); + if (!(__err & ios_base::failbit) && 1 <= __t && __t <= 31) + __d = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_month( + int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 2) - 1; + if (!(__err & ios_base::failbit) && 0 <= __t && __t <= 11) + __m = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_year( + int& __y, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 4); + if (!(__err & ios_base::failbit)) { + if (__t < 69) + __t += 2000; + else if (69 <= __t && __t <= 99) + __t += 1900; + __y = __t - 1900; + } +} + +template +void time_get<_CharT, _InputIterator>::__get_year4( + int& __y, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 4); + if (!(__err & ios_base::failbit)) + __y = __t - 1900; +} + +template +void time_get<_CharT, _InputIterator>::__get_hour( + int& __h, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 2); + if (!(__err & ios_base::failbit) && __t <= 23) + __h = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_12_hour( + int& __h, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 2); + if (!(__err & ios_base::failbit) && 1 <= __t && __t <= 12) + __h = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_minute( + int& __m, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 2); + if (!(__err & ios_base::failbit) && __t <= 59) + __m = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_second( + int& __s, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 2); + if (!(__err & ios_base::failbit) && __t <= 60) + __s = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_weekday( + int& __w, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 1); + if (!(__err & ios_base::failbit) && __t <= 6) + __w = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_day_year_num( + int& __d, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + int __t = std::__get_up_to_n_digits(__b, __e, __err, __ct, 3); + if (!(__err & ios_base::failbit) && __t <= 365) + __d = __t; + else + __err |= ios_base::failbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_white_space( + iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + for (; __b != __e && __ct.is(ctype_base::space, *__b); ++__b) + ; + if (__b == __e) + __err |= ios_base::eofbit; +} + +template +void time_get<_CharT, _InputIterator>::__get_am_pm( + int& __h, iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + const string_type* __ap = this->__am_pm(); + if (__ap[0].size() + __ap[1].size() == 0) { + __err |= ios_base::failbit; + return; + } + ptrdiff_t __i = std::__scan_keyword(__b, __e, __ap, __ap + 2, __ct, __err, false) - __ap; + if (__i == 0 && __h == 12) + __h = 0; + else if (__i == 1 && __h < 12) + __h += 12; +} + +template +void time_get<_CharT, _InputIterator>::__get_percent( + iter_type& __b, iter_type __e, ios_base::iostate& __err, const ctype& __ct) const { + if (__b == __e) { + __err |= ios_base::eofbit | ios_base::failbit; + return; + } + if (__ct.narrow(*__b, 0) != '%') + __err |= ios_base::failbit; + else if (++__b == __e) + __err |= ios_base::eofbit; +} + +// time_get end primitives + +template +_InputIterator time_get<_CharT, _InputIterator>::get( + iter_type __b, + iter_type __e, + ios_base& __iob, + ios_base::iostate& __err, + tm* __tm, + const char_type* __fmtb, + const char_type* __fmte) const { + const ctype& __ct = std::use_facet >(__iob.getloc()); + __err = ios_base::goodbit; + while (__fmtb != __fmte && __err == ios_base::goodbit) { + if (__b == __e) { + __err = ios_base::failbit; + break; + } + if (__ct.narrow(*__fmtb, 0) == '%') { + if (++__fmtb == __fmte) { + __err = ios_base::failbit; + break; + } + char __cmd = __ct.narrow(*__fmtb, 0); + char __opt = '\0'; + if (__cmd == 'E' || __cmd == '0') { + if (++__fmtb == __fmte) { + __err = ios_base::failbit; + break; + } + __opt = __cmd; + __cmd = __ct.narrow(*__fmtb, 0); + } + __b = do_get(__b, __e, __iob, __err, __tm, __cmd, __opt); + ++__fmtb; + } else if (__ct.is(ctype_base::space, *__fmtb)) { + for (++__fmtb; __fmtb != __fmte && __ct.is(ctype_base::space, *__fmtb); ++__fmtb) + ; + for (; __b != __e && __ct.is(ctype_base::space, *__b); ++__b) + ; + } else if (__ct.toupper(*__b) == __ct.toupper(*__fmtb)) { + ++__b; + ++__fmtb; + } else + __err = ios_base::failbit; + } + if (__b == __e) + __err |= ios_base::eofbit; + return __b; +} + +template +typename time_get<_CharT, _InputIterator>::dateorder time_get<_CharT, _InputIterator>::do_date_order() const { + return mdy; +} + +template +_InputIterator time_get<_CharT, _InputIterator>::do_get_time( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + const char_type __fmt[] = {'%', 'H', ':', '%', 'M', ':', '%', 'S'}; + return get(__b, __e, __iob, __err, __tm, __fmt, __fmt + sizeof(__fmt) / sizeof(__fmt[0])); +} + +template +_InputIterator time_get<_CharT, _InputIterator>::do_get_date( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + const string_type& __fmt = this->__x(); + return get(__b, __e, __iob, __err, __tm, __fmt.data(), __fmt.data() + __fmt.size()); +} + +template +_InputIterator time_get<_CharT, _InputIterator>::do_get_weekday( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + const ctype& __ct = std::use_facet >(__iob.getloc()); + __get_weekdayname(__tm->tm_wday, __b, __e, __err, __ct); + return __b; +} + +template +_InputIterator time_get<_CharT, _InputIterator>::do_get_monthname( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + const ctype& __ct = std::use_facet >(__iob.getloc()); + __get_monthname(__tm->tm_mon, __b, __e, __err, __ct); + return __b; +} + +template +_InputIterator time_get<_CharT, _InputIterator>::do_get_year( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm) const { + const ctype& __ct = std::use_facet >(__iob.getloc()); + __get_year(__tm->tm_year, __b, __e, __err, __ct); + return __b; +} + +template +_InputIterator time_get<_CharT, _InputIterator>::do_get( + iter_type __b, iter_type __e, ios_base& __iob, ios_base::iostate& __err, tm* __tm, char __fmt, char) const { + __err = ios_base::goodbit; + const ctype& __ct = std::use_facet >(__iob.getloc()); + switch (__fmt) { + case 'a': + case 'A': + __get_weekdayname(__tm->tm_wday, __b, __e, __err, __ct); + break; + case 'b': + case 'B': + case 'h': + __get_monthname(__tm->tm_mon, __b, __e, __err, __ct); + break; + case 'c': { + const string_type& __fm = this->__c(); + __b = get(__b, __e, __iob, __err, __tm, __fm.data(), __fm.data() + __fm.size()); + } break; + case 'd': + case 'e': + __get_day(__tm->tm_mday, __b, __e, __err, __ct); + break; + case 'D': { + const char_type __fm[] = {'%', 'm', '/', '%', 'd', '/', '%', 'y'}; + __b = get(__b, __e, __iob, __err, __tm, __fm, __fm + sizeof(__fm) / sizeof(__fm[0])); + } break; + case 'F': { + const char_type __fm[] = {'%', 'Y', '-', '%', 'm', '-', '%', 'd'}; + __b = get(__b, __e, __iob, __err, __tm, __fm, __fm + sizeof(__fm) / sizeof(__fm[0])); + } break; + case 'H': + __get_hour(__tm->tm_hour, __b, __e, __err, __ct); + break; + case 'I': + __get_12_hour(__tm->tm_hour, __b, __e, __err, __ct); + break; + case 'j': + __get_day_year_num(__tm->tm_yday, __b, __e, __err, __ct); + break; + case 'm': + __get_month(__tm->tm_mon, __b, __e, __err, __ct); + break; + case 'M': + __get_minute(__tm->tm_min, __b, __e, __err, __ct); + break; + case 'n': + case 't': + __get_white_space(__b, __e, __err, __ct); + break; + case 'p': + __get_am_pm(__tm->tm_hour, __b, __e, __err, __ct); + break; + case 'r': { + const char_type __fm[] = {'%', 'I', ':', '%', 'M', ':', '%', 'S', ' ', '%', 'p'}; + __b = get(__b, __e, __iob, __err, __tm, __fm, __fm + sizeof(__fm) / sizeof(__fm[0])); + } break; + case 'R': { + const char_type __fm[] = {'%', 'H', ':', '%', 'M'}; + __b = get(__b, __e, __iob, __err, __tm, __fm, __fm + sizeof(__fm) / sizeof(__fm[0])); + } break; + case 'S': + __get_second(__tm->tm_sec, __b, __e, __err, __ct); + break; + case 'T': { + const char_type __fm[] = {'%', 'H', ':', '%', 'M', ':', '%', 'S'}; + __b = get(__b, __e, __iob, __err, __tm, __fm, __fm + sizeof(__fm) / sizeof(__fm[0])); + } break; + case 'w': + __get_weekday(__tm->tm_wday, __b, __e, __err, __ct); + break; + case 'x': + return do_get_date(__b, __e, __iob, __err, __tm); + case 'X': { + const string_type& __fm = this->__X(); + __b = get(__b, __e, __iob, __err, __tm, __fm.data(), __fm.data() + __fm.size()); + } break; + case 'y': + __get_year(__tm->tm_year, __b, __e, __err, __ct); + break; + case 'Y': + __get_year4(__tm->tm_year, __b, __e, __err, __ct); + break; + case '%': + __get_percent(__b, __e, __err, __ct); + break; + default: + __err |= ios_base::failbit; + } + return __b; +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get; +# endif + +class _LIBCPP_EXPORTED_FROM_ABI __time_get { +protected: + __locale::__locale_t __loc_; + + __time_get(const char* __nm); + __time_get(const string& __nm); + ~__time_get(); +}; + +template +class __time_get_storage : public __time_get { +protected: + typedef basic_string<_CharT> string_type; + + string_type __weeks_[14]; + string_type __months_[24]; + string_type __am_pm_[2]; + string_type __c_; + string_type __r_; + string_type __x_; + string_type __X_; + + explicit __time_get_storage(const char* __nm); + explicit __time_get_storage(const string& __nm); + + _LIBCPP_HIDE_FROM_ABI ~__time_get_storage() {} + + time_base::dateorder __do_date_order() const; + +private: + void init(const ctype<_CharT>&); + string_type __analyze(char __fmt, const ctype<_CharT>&); +}; + +# define _LIBCPP_TIME_GET_STORAGE_EXPLICIT_INSTANTIATION(_CharT) \ + template <> \ + _LIBCPP_EXPORTED_FROM_ABI time_base::dateorder __time_get_storage<_CharT>::__do_date_order() const; \ + template <> \ + _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const char*); \ + template <> \ + _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&); \ + template <> \ + _LIBCPP_EXPORTED_FROM_ABI void __time_get_storage<_CharT>::init(const ctype<_CharT>&); \ + template <> \ + _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::string_type __time_get_storage<_CharT>::__analyze( \ + char, const ctype<_CharT>&); \ + extern template _LIBCPP_EXPORTED_FROM_ABI time_base::dateorder __time_get_storage<_CharT>::__do_date_order() \ + const; \ + extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const char*); \ + extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::__time_get_storage(const string&); \ + extern template _LIBCPP_EXPORTED_FROM_ABI void __time_get_storage<_CharT>::init(const ctype<_CharT>&); \ + extern template _LIBCPP_EXPORTED_FROM_ABI __time_get_storage<_CharT>::string_type \ + __time_get_storage<_CharT>::__analyze(char, const ctype<_CharT>&); + +_LIBCPP_TIME_GET_STORAGE_EXPLICIT_INSTANTIATION(char) +# if _LIBCPP_HAS_WIDE_CHARACTERS +_LIBCPP_TIME_GET_STORAGE_EXPLICIT_INSTANTIATION(wchar_t) +# endif +# undef _LIBCPP_TIME_GET_STORAGE_EXPLICIT_INSTANTIATION + +template > +class time_get_byname : public time_get<_CharT, _InputIterator>, private __time_get_storage<_CharT> { +public: + typedef time_base::dateorder dateorder; + typedef _InputIterator iter_type; + typedef _CharT char_type; + typedef basic_string string_type; + + _LIBCPP_HIDE_FROM_ABI explicit time_get_byname(const char* __nm, size_t __refs = 0) + : time_get<_CharT, _InputIterator>(__refs), __time_get_storage<_CharT>(__nm) {} + _LIBCPP_HIDE_FROM_ABI explicit time_get_byname(const string& __nm, size_t __refs = 0) + : time_get<_CharT, _InputIterator>(__refs), __time_get_storage<_CharT>(__nm) {} + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~time_get_byname() override {} + + _LIBCPP_HIDE_FROM_ABI_VIRTUAL dateorder do_date_order() const override { return this->__do_date_order(); } + +private: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type* __weeks() const override { return this->__weeks_; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type* __months() const override { return this->__months_; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type* __am_pm() const override { return this->__am_pm_; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type& __c() const override { return this->__c_; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type& __r() const override { return this->__r_; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type& __x() const override { return this->__x_; } + _LIBCPP_HIDE_FROM_ABI_VIRTUAL const string_type& __X() const override { return this->__X_; } +}; + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname; +# endif + +class _LIBCPP_EXPORTED_FROM_ABI __time_put { + __locale::__locale_t __loc_; + +protected: + _LIBCPP_HIDE_FROM_ABI __time_put() : __loc_(_LIBCPP_GET_C_LOCALE) {} + __time_put(const char* __nm); + __time_put(const string& __nm); + ~__time_put(); + void __do_put(char* __nb, char*& __ne, const tm* __tm, char __fmt, char __mod) const; +# if _LIBCPP_HAS_WIDE_CHARACTERS + void __do_put(wchar_t* __wb, wchar_t*& __we, const tm* __tm, char __fmt, char __mod) const; +# endif +}; + +template > +class time_put : public locale::facet, private __time_put { +public: + typedef _CharT char_type; + typedef _OutputIterator iter_type; + + _LIBCPP_HIDE_FROM_ABI explicit time_put(size_t __refs = 0) : locale::facet(__refs) {} + + iter_type + put(iter_type __s, ios_base& __iob, char_type __fl, const tm* __tm, const char_type* __pb, const char_type* __pe) + const; + + _LIBCPP_HIDE_FROM_ABI iter_type + put(iter_type __s, ios_base& __iob, char_type __fl, const tm* __tm, char __fmt, char __mod = 0) const { + return do_put(__s, __iob, __fl, __tm, __fmt, __mod); + } + + static locale::id id; + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~time_put() override {} + virtual iter_type do_put(iter_type __s, ios_base&, char_type, const tm* __tm, char __fmt, char __mod) const; + + _LIBCPP_HIDE_FROM_ABI explicit time_put(const char* __nm, size_t __refs) : locale::facet(__refs), __time_put(__nm) {} + _LIBCPP_HIDE_FROM_ABI explicit time_put(const string& __nm, size_t __refs) + : locale::facet(__refs), __time_put(__nm) {} +}; + +template +locale::id time_put<_CharT, _OutputIterator>::id; + +template +_OutputIterator time_put<_CharT, _OutputIterator>::put( + iter_type __s, ios_base& __iob, char_type __fl, const tm* __tm, const char_type* __pb, const char_type* __pe) + const { + const ctype& __ct = std::use_facet >(__iob.getloc()); + for (; __pb != __pe; ++__pb) { + if (__ct.narrow(*__pb, 0) == '%') { + if (++__pb == __pe) { + *__s++ = __pb[-1]; + break; + } + char __mod = 0; + char __fmt = __ct.narrow(*__pb, 0); + if (__fmt == 'E' || __fmt == 'O') { + if (++__pb == __pe) { + *__s++ = __pb[-2]; + *__s++ = __pb[-1]; + break; + } + __mod = __fmt; + __fmt = __ct.narrow(*__pb, 0); + } + __s = do_put(__s, __iob, __fl, __tm, __fmt, __mod); + } else + *__s++ = *__pb; + } + return __s; +} + +template +_OutputIterator time_put<_CharT, _OutputIterator>::do_put( + iter_type __s, ios_base&, char_type, const tm* __tm, char __fmt, char __mod) const { + char_type __nar[100]; + char_type* __nb = __nar; + char_type* __ne = __nb + 100; + __do_put(__nb, __ne, __tm, __fmt, __mod); + return std::copy(__nb, __ne, __s); +} + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put; +# endif + +template > +class time_put_byname : public time_put<_CharT, _OutputIterator> { +public: + _LIBCPP_HIDE_FROM_ABI explicit time_put_byname(const char* __nm, size_t __refs = 0) + : time_put<_CharT, _OutputIterator>(__nm, __refs) {} + + _LIBCPP_HIDE_FROM_ABI explicit time_put_byname(const string& __nm, size_t __refs = 0) + : time_put<_CharT, _OutputIterator>(__nm, __refs) {} + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~time_put_byname() override {} +}; + +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname; +# if _LIBCPP_HAS_WIDE_CHARACTERS +extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname; +# endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_TIME_H diff --git a/lib/libcxx/include/__locale_dir/wbuffer_convert.h b/lib/libcxx/include/__locale_dir/wbuffer_convert.h new file mode 100644 index 0000000000..a6818aadf5 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/wbuffer_convert.h @@ -0,0 +1,430 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_WBUFFER_CONVERT_H +#define _LIBCPP___LOCALE_DIR_WBUFFER_CONVERT_H + +#include <__algorithm/reverse.h> +#include <__config> +#include <__string/char_traits.h> +#include +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +# if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template > +class _LIBCPP_DEPRECATED_IN_CXX17 wbuffer_convert : public basic_streambuf<_Elem, _Tr> { +public: + // types: + typedef _Elem char_type; + typedef _Tr traits_type; + typedef typename traits_type::int_type int_type; + typedef typename traits_type::pos_type pos_type; + typedef typename traits_type::off_type off_type; + typedef typename _Codecvt::state_type state_type; + +private: + char* __extbuf_; + const char* __extbufnext_; + const char* __extbufend_; + char __extbuf_min_[8]; + size_t __ebs_; + char_type* __intbuf_; + size_t __ibs_; + streambuf* __bufptr_; + _Codecvt* __cv_; + state_type __st_; + ios_base::openmode __cm_; + bool __owns_eb_; + bool __owns_ib_; + bool __always_noconv_; + +public: +# ifndef _LIBCPP_CXX03_LANG + _LIBCPP_HIDE_FROM_ABI wbuffer_convert() : wbuffer_convert(nullptr) {} + explicit _LIBCPP_HIDE_FROM_ABI + wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt, state_type __state = state_type()); +# else + _LIBCPP_EXPLICIT_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI + wbuffer_convert(streambuf* __bytebuf = nullptr, _Codecvt* __pcvt = new _Codecvt, state_type __state = state_type()); +# endif + + _LIBCPP_HIDE_FROM_ABI ~wbuffer_convert(); + + _LIBCPP_HIDE_FROM_ABI streambuf* rdbuf() const { return __bufptr_; } + _LIBCPP_HIDE_FROM_ABI streambuf* rdbuf(streambuf* __bytebuf) { + streambuf* __r = __bufptr_; + __bufptr_ = __bytebuf; + return __r; + } + + wbuffer_convert(const wbuffer_convert&) = delete; + wbuffer_convert& operator=(const wbuffer_convert&) = delete; + + _LIBCPP_HIDE_FROM_ABI state_type state() const { return __st_; } + +protected: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual int_type underflow(); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual int_type pbackfail(int_type __c = traits_type::eof()); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual int_type overflow(int_type __c = traits_type::eof()); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual basic_streambuf* setbuf(char_type* __s, streamsize __n); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual pos_type + seekoff(off_type __off, ios_base::seekdir __way, ios_base::openmode __wch = ios_base::in | ios_base::out); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual pos_type + seekpos(pos_type __sp, ios_base::openmode __wch = ios_base::in | ios_base::out); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual int sync(); + +private: + _LIBCPP_HIDE_FROM_ABI_VIRTUAL bool __read_mode(); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL void __write_mode(); + _LIBCPP_HIDE_FROM_ABI_VIRTUAL wbuffer_convert* __close(); +}; + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +wbuffer_convert<_Codecvt, _Elem, _Tr>::wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt, state_type __state) + : __extbuf_(nullptr), + __extbufnext_(nullptr), + __extbufend_(nullptr), + __ebs_(0), + __intbuf_(0), + __ibs_(0), + __bufptr_(__bytebuf), + __cv_(__pcvt), + __st_(__state), + __cm_(0), + __owns_eb_(false), + __owns_ib_(false), + __always_noconv_(__cv_ ? __cv_->always_noconv() : false) { + setbuf(0, 4096); +} + +template +wbuffer_convert<_Codecvt, _Elem, _Tr>::~wbuffer_convert() { + __close(); + delete __cv_; + if (__owns_eb_) + delete[] __extbuf_; + if (__owns_ib_) + delete[] __intbuf_; +} + +template +typename wbuffer_convert<_Codecvt, _Elem, _Tr>::int_type wbuffer_convert<_Codecvt, _Elem, _Tr>::underflow() { + _LIBCPP_SUPPRESS_DEPRECATED_POP + if (__cv_ == 0 || __bufptr_ == nullptr) + return traits_type::eof(); + bool __initial = __read_mode(); + char_type __1buf; + if (this->gptr() == 0) + this->setg(std::addressof(__1buf), std::addressof(__1buf) + 1, std::addressof(__1buf) + 1); + const size_t __unget_sz = __initial ? 0 : std::min((this->egptr() - this->eback()) / 2, 4); + int_type __c = traits_type::eof(); + if (this->gptr() == this->egptr()) { + std::memmove(this->eback(), this->egptr() - __unget_sz, __unget_sz * sizeof(char_type)); + if (__always_noconv_) { + streamsize __nmemb = static_cast(this->egptr() - this->eback() - __unget_sz); + __nmemb = __bufptr_->sgetn((char*)this->eback() + __unget_sz, __nmemb); + if (__nmemb != 0) { + this->setg(this->eback(), this->eback() + __unget_sz, this->eback() + __unget_sz + __nmemb); + __c = *this->gptr(); + } + } else { + if (__extbufend_ != __extbufnext_) { + _LIBCPP_ASSERT_NON_NULL(__extbufnext_ != nullptr, "underflow moving from nullptr"); + _LIBCPP_ASSERT_NON_NULL(__extbuf_ != nullptr, "underflow moving into nullptr"); + std::memmove(__extbuf_, __extbufnext_, __extbufend_ - __extbufnext_); + } + __extbufnext_ = __extbuf_ + (__extbufend_ - __extbufnext_); + __extbufend_ = __extbuf_ + (__extbuf_ == __extbuf_min_ ? sizeof(__extbuf_min_) : __ebs_); + streamsize __nmemb = std::min(static_cast(this->egptr() - this->eback() - __unget_sz), + static_cast(__extbufend_ - __extbufnext_)); + codecvt_base::result __r; + // FIXME: Do we ever need to restore the state here? + // state_type __svs = __st_; + streamsize __nr = __bufptr_->sgetn(const_cast(__extbufnext_), __nmemb); + if (__nr != 0) { + __extbufend_ = __extbufnext_ + __nr; + char_type* __inext; + __r = __cv_->in( + __st_, __extbuf_, __extbufend_, __extbufnext_, this->eback() + __unget_sz, this->egptr(), __inext); + if (__r == codecvt_base::noconv) { + this->setg((char_type*)__extbuf_, (char_type*)__extbuf_, (char_type*)const_cast(__extbufend_)); + __c = *this->gptr(); + } else if (__inext != this->eback() + __unget_sz) { + this->setg(this->eback(), this->eback() + __unget_sz, __inext); + __c = *this->gptr(); + } + } + } + } else + __c = *this->gptr(); + if (this->eback() == std::addressof(__1buf)) + this->setg(0, 0, 0); + return __c; +} + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +typename wbuffer_convert<_Codecvt, _Elem, _Tr>::int_type +wbuffer_convert<_Codecvt, _Elem, _Tr>::pbackfail(int_type __c) { + _LIBCPP_SUPPRESS_DEPRECATED_POP + if (__cv_ != 0 && __bufptr_ && this->eback() < this->gptr()) { + if (traits_type::eq_int_type(__c, traits_type::eof())) { + this->gbump(-1); + return traits_type::not_eof(__c); + } + if (traits_type::eq(traits_type::to_char_type(__c), this->gptr()[-1])) { + this->gbump(-1); + *this->gptr() = traits_type::to_char_type(__c); + return __c; + } + } + return traits_type::eof(); +} + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +typename wbuffer_convert<_Codecvt, _Elem, _Tr>::int_type wbuffer_convert<_Codecvt, _Elem, _Tr>::overflow(int_type __c) { + _LIBCPP_SUPPRESS_DEPRECATED_POP + if (__cv_ == 0 || !__bufptr_) + return traits_type::eof(); + __write_mode(); + char_type __1buf; + char_type* __pb_save = this->pbase(); + char_type* __epb_save = this->epptr(); + if (!traits_type::eq_int_type(__c, traits_type::eof())) { + if (this->pptr() == 0) + this->setp(std::addressof(__1buf), std::addressof(__1buf) + 1); + *this->pptr() = traits_type::to_char_type(__c); + this->pbump(1); + } + if (this->pptr() != this->pbase()) { + if (__always_noconv_) { + streamsize __nmemb = static_cast(this->pptr() - this->pbase()); + if (__bufptr_->sputn((const char*)this->pbase(), __nmemb) != __nmemb) + return traits_type::eof(); + } else { + char* __extbe = __extbuf_; + codecvt_base::result __r; + do { + const char_type* __e; + __r = __cv_->out(__st_, this->pbase(), this->pptr(), __e, __extbuf_, __extbuf_ + __ebs_, __extbe); + if (__e == this->pbase()) + return traits_type::eof(); + if (__r == codecvt_base::noconv) { + streamsize __nmemb = static_cast(this->pptr() - this->pbase()); + if (__bufptr_->sputn((const char*)this->pbase(), __nmemb) != __nmemb) + return traits_type::eof(); + } else if (__r == codecvt_base::ok || __r == codecvt_base::partial) { + streamsize __nmemb = static_cast(__extbe - __extbuf_); + if (__bufptr_->sputn(__extbuf_, __nmemb) != __nmemb) + return traits_type::eof(); + if (__r == codecvt_base::partial) { + this->setp(const_cast(__e), this->pptr()); + this->__pbump(this->epptr() - this->pbase()); + } + } else + return traits_type::eof(); + } while (__r == codecvt_base::partial); + } + this->setp(__pb_save, __epb_save); + } + return traits_type::not_eof(__c); +} + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +basic_streambuf<_Elem, _Tr>* wbuffer_convert<_Codecvt, _Elem, _Tr>::setbuf(char_type* __s, streamsize __n) { + _LIBCPP_SUPPRESS_DEPRECATED_POP + this->setg(0, 0, 0); + this->setp(0, 0); + if (__owns_eb_) + delete[] __extbuf_; + if (__owns_ib_) + delete[] __intbuf_; + __ebs_ = __n; + if (__ebs_ > sizeof(__extbuf_min_)) { + if (__always_noconv_ && __s) { + __extbuf_ = (char*)__s; + __owns_eb_ = false; + } else { + __extbuf_ = new char[__ebs_]; + __owns_eb_ = true; + } + } else { + __extbuf_ = __extbuf_min_; + __ebs_ = sizeof(__extbuf_min_); + __owns_eb_ = false; + } + if (!__always_noconv_) { + __ibs_ = max(__n, sizeof(__extbuf_min_)); + if (__s && __ibs_ >= sizeof(__extbuf_min_)) { + __intbuf_ = __s; + __owns_ib_ = false; + } else { + __intbuf_ = new char_type[__ibs_]; + __owns_ib_ = true; + } + } else { + __ibs_ = 0; + __intbuf_ = 0; + __owns_ib_ = false; + } + return this; +} + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +typename wbuffer_convert<_Codecvt, _Elem, _Tr>::pos_type +wbuffer_convert<_Codecvt, _Elem, _Tr>::seekoff(off_type __off, ios_base::seekdir __way, ios_base::openmode __om) { + int __width = __cv_->encoding(); + if (__cv_ == 0 || !__bufptr_ || (__width <= 0 && __off != 0) || sync()) + return pos_type(off_type(-1)); + // __width > 0 || __off == 0, now check __way + if (__way != ios_base::beg && __way != ios_base::cur && __way != ios_base::end) + return pos_type(off_type(-1)); + pos_type __r = __bufptr_->pubseekoff(__width * __off, __way, __om); + __r.state(__st_); + return __r; +} + +template +typename wbuffer_convert<_Codecvt, _Elem, _Tr>::pos_type +wbuffer_convert<_Codecvt, _Elem, _Tr>::seekpos(pos_type __sp, ios_base::openmode __wch) { + if (__cv_ == 0 || !__bufptr_ || sync()) + return pos_type(off_type(-1)); + if (__bufptr_->pubseekpos(__sp, __wch) == pos_type(off_type(-1))) + return pos_type(off_type(-1)); + return __sp; +} + +template +int wbuffer_convert<_Codecvt, _Elem, _Tr>::sync() { + _LIBCPP_SUPPRESS_DEPRECATED_POP + if (__cv_ == 0 || !__bufptr_) + return 0; + if (__cm_ & ios_base::out) { + if (this->pptr() != this->pbase()) + if (overflow() == traits_type::eof()) + return -1; + codecvt_base::result __r; + do { + char* __extbe; + __r = __cv_->unshift(__st_, __extbuf_, __extbuf_ + __ebs_, __extbe); + streamsize __nmemb = static_cast(__extbe - __extbuf_); + if (__bufptr_->sputn(__extbuf_, __nmemb) != __nmemb) + return -1; + } while (__r == codecvt_base::partial); + if (__r == codecvt_base::error) + return -1; + if (__bufptr_->pubsync()) + return -1; + } else if (__cm_ & ios_base::in) { + off_type __c; + if (__always_noconv_) + __c = this->egptr() - this->gptr(); + else { + int __width = __cv_->encoding(); + __c = __extbufend_ - __extbufnext_; + if (__width > 0) + __c += __width * (this->egptr() - this->gptr()); + else { + if (this->gptr() != this->egptr()) { + std::reverse(this->gptr(), this->egptr()); + codecvt_base::result __r; + const char_type* __e = this->gptr(); + char* __extbe; + do { + __r = __cv_->out(__st_, __e, this->egptr(), __e, __extbuf_, __extbuf_ + __ebs_, __extbe); + switch (__r) { + case codecvt_base::noconv: + __c += this->egptr() - this->gptr(); + break; + case codecvt_base::ok: + case codecvt_base::partial: + __c += __extbe - __extbuf_; + break; + default: + return -1; + } + } while (__r == codecvt_base::partial); + } + } + } + if (__bufptr_->pubseekoff(-__c, ios_base::cur, __cm_) == pos_type(off_type(-1))) + return -1; + this->setg(0, 0, 0); + __cm_ = 0; + } + return 0; +} + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +bool wbuffer_convert<_Codecvt, _Elem, _Tr>::__read_mode() { + if (!(__cm_ & ios_base::in)) { + this->setp(0, 0); + if (__always_noconv_) + this->setg((char_type*)__extbuf_, (char_type*)__extbuf_ + __ebs_, (char_type*)__extbuf_ + __ebs_); + else + this->setg(__intbuf_, __intbuf_ + __ibs_, __intbuf_ + __ibs_); + __cm_ = ios_base::in; + return true; + } + return false; +} + +template +void wbuffer_convert<_Codecvt, _Elem, _Tr>::__write_mode() { + if (!(__cm_ & ios_base::out)) { + this->setg(0, 0, 0); + if (__ebs_ > sizeof(__extbuf_min_)) { + if (__always_noconv_) + this->setp((char_type*)__extbuf_, (char_type*)__extbuf_ + (__ebs_ - 1)); + else + this->setp(__intbuf_, __intbuf_ + (__ibs_ - 1)); + } else + this->setp(0, 0); + __cm_ = ios_base::out; + } +} + +template +wbuffer_convert<_Codecvt, _Elem, _Tr>* wbuffer_convert<_Codecvt, _Elem, _Tr>::__close() { + wbuffer_convert* __rt = nullptr; + if (__cv_ != nullptr && __bufptr_ != nullptr) { + __rt = this; + if ((__cm_ & ios_base::out) && sync()) + __rt = nullptr; + } + return __rt; +} + +_LIBCPP_SUPPRESS_DEPRECATED_POP + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +# endif // _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_WBUFFER_CONVERT_H diff --git a/lib/libcxx/include/__locale_dir/wstring_convert.h b/lib/libcxx/include/__locale_dir/wstring_convert.h new file mode 100644 index 0000000000..42a56eb857 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/wstring_convert.h @@ -0,0 +1,254 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_WSTRING_CONVERT_H +#define _LIBCPP___LOCALE_DIR_WSTRING_CONVERT_H + +#include <__config> +#include <__locale> +#include <__memory/allocator.h> +#include + +#if _LIBCPP_HAS_LOCALIZATION + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +# if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + +_LIBCPP_PUSH_MACROS +# include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , + class _ByteAlloc = allocator > +class _LIBCPP_DEPRECATED_IN_CXX17 wstring_convert { +public: + typedef basic_string, _ByteAlloc> byte_string; + typedef basic_string<_Elem, char_traits<_Elem>, _WideAlloc> wide_string; + typedef typename _Codecvt::state_type state_type; + typedef typename wide_string::traits_type::int_type int_type; + +private: + byte_string __byte_err_string_; + wide_string __wide_err_string_; + _Codecvt* __cvtptr_; + state_type __cvtstate_; + size_t __cvtcount_; + +public: +# ifndef _LIBCPP_CXX03_LANG + _LIBCPP_HIDE_FROM_ABI wstring_convert() : wstring_convert(new _Codecvt) {} + _LIBCPP_HIDE_FROM_ABI explicit wstring_convert(_Codecvt* __pcvt); +# else + _LIBCPP_HIDE_FROM_ABI _LIBCPP_EXPLICIT_SINCE_CXX14 wstring_convert(_Codecvt* __pcvt = new _Codecvt); +# endif + + _LIBCPP_HIDE_FROM_ABI wstring_convert(_Codecvt* __pcvt, state_type __state); + _LIBCPP_EXPLICIT_SINCE_CXX14 _LIBCPP_HIDE_FROM_ABI + wstring_convert(const byte_string& __byte_err, const wide_string& __wide_err = wide_string()); +# ifndef _LIBCPP_CXX03_LANG + _LIBCPP_HIDE_FROM_ABI wstring_convert(wstring_convert&& __wc); +# endif + _LIBCPP_HIDE_FROM_ABI ~wstring_convert(); + + wstring_convert(const wstring_convert& __wc) = delete; + wstring_convert& operator=(const wstring_convert& __wc) = delete; + + _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(char __byte) { return from_bytes(&__byte, &__byte + 1); } + _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const char* __ptr) { + return from_bytes(__ptr, __ptr + char_traits::length(__ptr)); + } + _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const byte_string& __str) { + return from_bytes(__str.data(), __str.data() + __str.size()); + } + _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const char* __first, const char* __last); + + _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(_Elem __wchar) { + return to_bytes(std::addressof(__wchar), std::addressof(__wchar) + 1); + } + _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const _Elem* __wptr) { + return to_bytes(__wptr, __wptr + char_traits<_Elem>::length(__wptr)); + } + _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const wide_string& __wstr) { + return to_bytes(__wstr.data(), __wstr.data() + __wstr.size()); + } + _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const _Elem* __first, const _Elem* __last); + + _LIBCPP_HIDE_FROM_ABI size_t converted() const _NOEXCEPT { return __cvtcount_; } + _LIBCPP_HIDE_FROM_ABI state_type state() const { return __cvtstate_; } +}; + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +inline wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::wstring_convert(_Codecvt* __pcvt) + : __cvtptr_(__pcvt), __cvtstate_(), __cvtcount_(0) {} +_LIBCPP_SUPPRESS_DEPRECATED_POP + +template +inline wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::wstring_convert(_Codecvt* __pcvt, state_type __state) + : __cvtptr_(__pcvt), __cvtstate_(__state), __cvtcount_(0) {} + +template +wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::wstring_convert( + const byte_string& __byte_err, const wide_string& __wide_err) + : __byte_err_string_(__byte_err), __wide_err_string_(__wide_err), __cvtstate_(), __cvtcount_(0) { + __cvtptr_ = new _Codecvt; +} + +# ifndef _LIBCPP_CXX03_LANG + +template +inline wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::wstring_convert(wstring_convert&& __wc) + : __byte_err_string_(std::move(__wc.__byte_err_string_)), + __wide_err_string_(std::move(__wc.__wide_err_string_)), + __cvtptr_(__wc.__cvtptr_), + __cvtstate_(__wc.__cvtstate_), + __cvtcount_(__wc.__cvtcount_) { + __wc.__cvtptr_ = nullptr; +} + +# endif // _LIBCPP_CXX03_LANG + +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::~wstring_convert() { + delete __cvtptr_; +} + +template +typename wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::wide_string +wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::from_bytes(const char* __frm, const char* __frm_end) { + _LIBCPP_SUPPRESS_DEPRECATED_POP + __cvtcount_ = 0; + if (__cvtptr_ != nullptr) { + wide_string __ws(2 * (__frm_end - __frm), _Elem()); + if (__frm != __frm_end) + __ws.resize(__ws.capacity()); + codecvt_base::result __r = codecvt_base::ok; + state_type __st = __cvtstate_; + if (__frm != __frm_end) { + _Elem* __to = std::addressof(__ws[0]); + _Elem* __to_end = __to + __ws.size(); + const char* __frm_nxt; + do { + _Elem* __to_nxt; + __r = __cvtptr_->in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); + __cvtcount_ += __frm_nxt - __frm; + if (__frm_nxt == __frm) { + __r = codecvt_base::error; + } else if (__r == codecvt_base::noconv) { + __ws.resize(__to - std::addressof(__ws[0])); + // This only gets executed if _Elem is char + __ws.append((const _Elem*)__frm, (const _Elem*)__frm_end); + __frm = __frm_nxt; + __r = codecvt_base::ok; + } else if (__r == codecvt_base::ok) { + __ws.resize(__to_nxt - std::addressof(__ws[0])); + __frm = __frm_nxt; + } else if (__r == codecvt_base::partial) { + ptrdiff_t __s = __to_nxt - std::addressof(__ws[0]); + __ws.resize(2 * __s); + __to = std::addressof(__ws[0]) + __s; + __to_end = std::addressof(__ws[0]) + __ws.size(); + __frm = __frm_nxt; + } + } while (__r == codecvt_base::partial && __frm_nxt < __frm_end); + } + if (__r == codecvt_base::ok) + return __ws; + } + + if (__wide_err_string_.empty()) + std::__throw_range_error("wstring_convert: from_bytes error"); + + return __wide_err_string_; +} + +template +typename wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::byte_string +wstring_convert<_Codecvt, _Elem, _WideAlloc, _ByteAlloc>::to_bytes(const _Elem* __frm, const _Elem* __frm_end) { + __cvtcount_ = 0; + if (__cvtptr_ != nullptr) { + byte_string __bs(2 * (__frm_end - __frm), char()); + if (__frm != __frm_end) + __bs.resize(__bs.capacity()); + codecvt_base::result __r = codecvt_base::ok; + state_type __st = __cvtstate_; + if (__frm != __frm_end) { + char* __to = std::addressof(__bs[0]); + char* __to_end = __to + __bs.size(); + const _Elem* __frm_nxt; + do { + char* __to_nxt; + __r = __cvtptr_->out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); + __cvtcount_ += __frm_nxt - __frm; + if (__frm_nxt == __frm) { + __r = codecvt_base::error; + } else if (__r == codecvt_base::noconv) { + __bs.resize(__to - std::addressof(__bs[0])); + // This only gets executed if _Elem is char + __bs.append((const char*)__frm, (const char*)__frm_end); + __frm = __frm_nxt; + __r = codecvt_base::ok; + } else if (__r == codecvt_base::ok) { + __bs.resize(__to_nxt - std::addressof(__bs[0])); + __frm = __frm_nxt; + } else if (__r == codecvt_base::partial) { + ptrdiff_t __s = __to_nxt - std::addressof(__bs[0]); + __bs.resize(2 * __s); + __to = std::addressof(__bs[0]) + __s; + __to_end = std::addressof(__bs[0]) + __bs.size(); + __frm = __frm_nxt; + } + } while (__r == codecvt_base::partial && __frm_nxt < __frm_end); + } + if (__r == codecvt_base::ok) { + size_t __s = __bs.size(); + __bs.resize(__bs.capacity()); + char* __to = std::addressof(__bs[0]) + __s; + char* __to_end = __to + __bs.size(); + do { + char* __to_nxt; + __r = __cvtptr_->unshift(__st, __to, __to_end, __to_nxt); + if (__r == codecvt_base::noconv) { + __bs.resize(__to - std::addressof(__bs[0])); + __r = codecvt_base::ok; + } else if (__r == codecvt_base::ok) { + __bs.resize(__to_nxt - std::addressof(__bs[0])); + } else if (__r == codecvt_base::partial) { + ptrdiff_t __sp = __to_nxt - std::addressof(__bs[0]); + __bs.resize(2 * __sp); + __to = std::addressof(__bs[0]) + __sp; + __to_end = std::addressof(__bs[0]) + __bs.size(); + } + } while (__r == codecvt_base::partial); + if (__r == codecvt_base::ok) + return __bs; + } + } + + if (__byte_err_string_.empty()) + std::__throw_range_error("wstring_convert: to_bytes error"); + + return __byte_err_string_; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +# endif // _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_WSTRING_CONVERT_H diff --git a/lib/libcxx/include/__log_hardening_failure b/lib/libcxx/include/__log_hardening_failure new file mode 100644 index 0000000000..d1805306f6 --- /dev/null +++ b/lib/libcxx/include/__log_hardening_failure @@ -0,0 +1,42 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOG_HARDENING_FAILURE +#define _LIBCPP___LOG_HARDENING_FAILURE + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +// Hardening logging is not available in the C++03 mode; moreover, it is currently only available in the experimental +// library. +#if _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC && !defined(_LIBCPP_CXX03_LANG) + +_LIBCPP_BEGIN_NAMESPACE_STD + +// This function should never be called directly from the code -- it should only be called through the +// `_LIBCPP_LOG_HARDENING_FAILURE` macro. +[[__gnu__::__cold__]] _LIBCPP_EXPORTED_FROM_ABI void __log_hardening_failure(const char* __message) noexcept; + +// _LIBCPP_LOG_HARDENING_FAILURE(message) +// +// This macro is used to log an error without terminating the program (as is the case for hardening failures if the +// `observe` assertion semantic is used). + +# if !defined(_LIBCPP_LOG_HARDENING_FAILURE) +# define _LIBCPP_LOG_HARDENING_FAILURE(__message) ::std::__log_hardening_failure(__message) +# endif // !defined(_LIBCPP_LOG_HARDENING_FAILURE) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC && !defined(_LIBCPP_CXX03_LANG) + +#endif // _LIBCPP___LOG_HARDENING_FAILURE diff --git a/lib/libcxx/include/__math/abs.h b/lib/libcxx/include/__math/abs.h index fc3bf3a2c7..b780159f11 100644 --- a/lib/libcxx/include/__math/abs.h +++ b/lib/libcxx/include/__math/abs.h @@ -39,6 +39,30 @@ template ::value, int> = 0> return __builtin_fabs((double)__x); } +// abs + +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline float abs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); } +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline double abs(double __x) _NOEXCEPT { return __builtin_fabs(__x); } + +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline long double abs(long double __x) _NOEXCEPT { + return __builtin_fabsl(__x); +} + +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline int abs(int __x) _NOEXCEPT { + return __builtin_abs(__x); +} + +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline long abs(long __x) _NOEXCEPT { + return __builtin_labs(__x); +} + +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline long long abs(long long __x) _NOEXCEPT { + return __builtin_llabs(__x); +} + } // namespace __math _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__math/copysign.h b/lib/libcxx/include/__math/copysign.h index c3ca6a3b03..4c297cb089 100644 --- a/lib/libcxx/include/__math/copysign.h +++ b/lib/libcxx/include/__math/copysign.h @@ -33,7 +33,7 @@ namespace __math { } template ::value && is_arithmetic<_A2>::value, int> = 0> -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> copysign(_A1 __x, _A2 __y) _NOEXCEPT { return ::__builtin_copysign(__x, __y); } diff --git a/lib/libcxx/include/__math/exponential_functions.h b/lib/libcxx/include/__math/exponential_functions.h index 109c334997..09930b7819 100644 --- a/lib/libcxx/include/__math/exponential_functions.h +++ b/lib/libcxx/include/__math/exponential_functions.h @@ -158,8 +158,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double pow(long double __x, long double __y) _ } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type pow(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> pow(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::pow((__result_type)__x, (__result_type)__y); } diff --git a/lib/libcxx/include/__math/fdim.h b/lib/libcxx/include/__math/fdim.h index dc1b4ecc07..a1081c7bde 100644 --- a/lib/libcxx/include/__math/fdim.h +++ b/lib/libcxx/include/__math/fdim.h @@ -35,8 +35,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double fdim(long double __x, long double __y) } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fdim(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> fdim(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fdim((__result_type)__x, (__result_type)__y); } diff --git a/lib/libcxx/include/__math/fma.h b/lib/libcxx/include/__math/fma.h index 6ba7a5a2d2..b972d85b89 100644 --- a/lib/libcxx/include/__math/fma.h +++ b/lib/libcxx/include/__math/fma.h @@ -40,8 +40,8 @@ template ::value && is_arithmetic<_A2>::value && is_arithmetic<_A3>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2, _A3>::type fma(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2, _A3>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2, _A3> fma(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2, _A3>; static_assert( !(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value && _IsSame<_A3, __result_type>::value), ""); diff --git a/lib/libcxx/include/__math/hypot.h b/lib/libcxx/include/__math/hypot.h index b2bf8e11c8..8e8c35b4a4 100644 --- a/lib/libcxx/include/__math/hypot.h +++ b/lib/libcxx/include/__math/hypot.h @@ -43,8 +43,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double hypot(long double __x, long double __y) } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type hypot(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> hypot(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::hypot((__result_type)__x, (__result_type)__y); } @@ -91,8 +91,8 @@ template && is_arithmetic_v<_A2> && is_arithmetic_v<_A3>, int> = 0 > -_LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2, _A3>::type hypot(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2, _A3>::type; +_LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2, _A3> hypot(_A1 __x, _A2 __y, _A3 __z) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2, _A3>; static_assert(!( std::is_same_v<_A1, __result_type> && std::is_same_v<_A2, __result_type> && std::is_same_v<_A3, __result_type>)); return __math::__hypot( diff --git a/lib/libcxx/include/__math/inverse_trigonometric_functions.h b/lib/libcxx/include/__math/inverse_trigonometric_functions.h index cd98b46a6a..409500278e 100644 --- a/lib/libcxx/include/__math/inverse_trigonometric_functions.h +++ b/lib/libcxx/include/__math/inverse_trigonometric_functions.h @@ -86,8 +86,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double atan2(long double __y, long double __x) } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type atan2(_A1 __y, _A2 __x) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> atan2(_A1 __y, _A2 __x) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::atan2((__result_type)__y, (__result_type)__x); } diff --git a/lib/libcxx/include/__math/min_max.h b/lib/libcxx/include/__math/min_max.h index db900c849e..1ddbb557d1 100644 --- a/lib/libcxx/include/__math/min_max.h +++ b/lib/libcxx/include/__math/min_max.h @@ -39,8 +39,8 @@ template } template ::value && is_arithmetic<_A2>::value, int> = 0> -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> fmax(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmax((__result_type)__x, (__result_type)__y); } @@ -61,8 +61,8 @@ template } template ::value && is_arithmetic<_A2>::value, int> = 0> -[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> fmin(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmin((__result_type)__x, (__result_type)__y); } diff --git a/lib/libcxx/include/__math/modulo.h b/lib/libcxx/include/__math/modulo.h index c8ea506f37..71405abb6b 100644 --- a/lib/libcxx/include/__math/modulo.h +++ b/lib/libcxx/include/__math/modulo.h @@ -37,8 +37,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double fmod(long double __x, long double __y) } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmod(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> fmod(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmod((__result_type)__x, (__result_type)__y); } diff --git a/lib/libcxx/include/__math/remainder.h b/lib/libcxx/include/__math/remainder.h index 0adb7f3af5..39fb76af6b 100644 --- a/lib/libcxx/include/__math/remainder.h +++ b/lib/libcxx/include/__math/remainder.h @@ -37,8 +37,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double remainder(long double __x, long double } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type remainder(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> remainder(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::remainder((__result_type)__x, (__result_type)__y); } @@ -59,8 +59,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double remquo(long double __x, long double __y } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type remquo(_A1 __x, _A2 __y, int* __z) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> remquo(_A1 __x, _A2 __y, int* __z) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::remquo((__result_type)__x, (__result_type)__y, __z); } diff --git a/lib/libcxx/include/__math/rounding_functions.h b/lib/libcxx/include/__math/rounding_functions.h index 474f585a62..aadeb395fa 100644 --- a/lib/libcxx/include/__math/rounding_functions.h +++ b/lib/libcxx/include/__math/rounding_functions.h @@ -158,8 +158,8 @@ inline _LIBCPP_HIDE_FROM_ABI long double nextafter(long double __x, long double } template ::value && is_arithmetic<_A2>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type nextafter(_A1 __x, _A2 __y) _NOEXCEPT { - using __result_type = typename __promote<_A1, _A2>::type; +inline _LIBCPP_HIDE_FROM_ABI __promote_t<_A1, _A2> nextafter(_A1 __x, _A2 __y) _NOEXCEPT { + using __result_type = __promote_t<_A1, _A2>; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::nextafter((__result_type)__x, (__result_type)__y); } diff --git a/lib/libcxx/include/__math/traits.h b/lib/libcxx/include/__math/traits.h index 0c96f766a7..4a6e58c6da 100644 --- a/lib/libcxx/include/__math/traits.h +++ b/lib/libcxx/include/__math/traits.h @@ -13,7 +13,6 @@ #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_integral.h> -#include <__type_traits/is_signed.h> #include <__type_traits/promote.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -51,16 +50,11 @@ template return __builtin_signbit(__x); } -template ::value && is_signed<_A1>::value, int> = 0> +template ::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { return __x < 0; } -template ::value && !is_signed<_A1>::value, int> = 0> -[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT { - return false; -} - // isfinite template ::value, int> = 0> @@ -151,7 +145,7 @@ template ::value, int> = 0> template ::value && is_arithmetic<_A2>::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT { - using type = typename __promote<_A1, _A2>::type; + using type = __promote_t<_A1, _A2>; return __builtin_isgreater((type)__x, (type)__y); } @@ -159,7 +153,7 @@ template ::value && is_ar template ::value && is_arithmetic<_A2>::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT { - using type = typename __promote<_A1, _A2>::type; + using type = __promote_t<_A1, _A2>; return __builtin_isgreaterequal((type)__x, (type)__y); } @@ -167,7 +161,7 @@ template ::value && is_ar template ::value && is_arithmetic<_A2>::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT { - using type = typename __promote<_A1, _A2>::type; + using type = __promote_t<_A1, _A2>; return __builtin_isless((type)__x, (type)__y); } @@ -175,7 +169,7 @@ template ::value && is_ar template ::value && is_arithmetic<_A2>::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT { - using type = typename __promote<_A1, _A2>::type; + using type = __promote_t<_A1, _A2>; return __builtin_islessequal((type)__x, (type)__y); } @@ -183,7 +177,7 @@ template ::value && is_ar template ::value && is_arithmetic<_A2>::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT { - using type = typename __promote<_A1, _A2>::type; + using type = __promote_t<_A1, _A2>; return __builtin_islessgreater((type)__x, (type)__y); } @@ -191,7 +185,7 @@ template ::value && is_ar template ::value && is_arithmetic<_A2>::value, int> = 0> [[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT { - using type = typename __promote<_A1, _A2>::type; + using type = __promote_t<_A1, _A2>; return __builtin_isunordered((type)__x, (type)__y); } diff --git a/lib/libcxx/include/__mbstate_t.h b/lib/libcxx/include/__mbstate_t.h index e013384454..c23ea7113c 100644 --- a/lib/libcxx/include/__mbstate_t.h +++ b/lib/libcxx/include/__mbstate_t.h @@ -43,12 +43,12 @@ # include // works on most Unixes #elif __has_include() # include // works on Darwin -#elif _LIBCPP_HAS_WIDE_CHARACTERS && __has_include_next() -# include_next // fall back to the C standard provider of mbstate_t +#elif __has_include_next() +# include_next // use the C standard provider of mbstate_t if present #elif __has_include_next() -# include_next // is also required to make mbstate_t visible +# include_next // Try in absence of for mbstate_t #else -# error "We don't know how to get the definition of mbstate_t without on your platform." +# error "We don't know how to get the definition of mbstate_t on your platform." #endif #endif // _LIBCPP___MBSTATE_T_H diff --git a/lib/libcxx/include/__mdspan/aligned_accessor.h b/lib/libcxx/include/__mdspan/aligned_accessor.h new file mode 100644 index 0000000000..2b8d4c52d8 --- /dev/null +++ b/lib/libcxx/include/__mdspan/aligned_accessor.h @@ -0,0 +1,87 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___MDSPAN_ALIGNED_ACCESSOR_H +#define _LIBCPP___MDSPAN_ALIGNED_ACCESSOR_H + +#include <__config> +#include <__cstddef/size_t.h> +#include <__mdspan/default_accessor.h> +#include <__memory/assume_aligned.h> +#include <__type_traits/is_abstract.h> +#include <__type_traits/is_array.h> +#include <__type_traits/is_convertible.h> +#include <__type_traits/remove_const.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 26 + +template +struct aligned_accessor { + static_assert(_ByteAlignment != 0 && (_ByteAlignment & (_ByteAlignment - 1)) == 0, + "aligned_accessor: byte alignment must be a power of two"); + static_assert(_ByteAlignment >= alignof(_ElementType), "aligned_accessor: insufficient byte alignment"); + static_assert(!is_array_v<_ElementType>, "aligned_accessor: template argument may not be an array type"); + static_assert(!is_abstract_v<_ElementType>, "aligned_accessor: template argument may not be an abstract class"); + + using offset_policy = default_accessor<_ElementType>; + using element_type = _ElementType; + using reference = _ElementType&; + using data_handle_type = _ElementType*; + + static constexpr size_t byte_alignment = _ByteAlignment; + + _LIBCPP_HIDE_FROM_ABI constexpr aligned_accessor() noexcept = default; + + template + requires(is_convertible_v<_OtherElementType (*)[], element_type (*)[]> && _OtherByteAlignment >= byte_alignment) + _LIBCPP_HIDE_FROM_ABI constexpr aligned_accessor(aligned_accessor<_OtherElementType, _OtherByteAlignment>) noexcept {} + + template + requires(is_convertible_v<_OtherElementType (*)[], element_type (*)[]>) + _LIBCPP_HIDE_FROM_ABI explicit constexpr aligned_accessor(default_accessor<_OtherElementType>) noexcept {} + + template + requires(is_convertible_v) + _LIBCPP_HIDE_FROM_ABI constexpr operator default_accessor<_OtherElementType>() const noexcept { + return {}; + } + + _LIBCPP_HIDE_FROM_ABI constexpr reference access(data_handle_type __p, size_t __i) const noexcept { + return std::assume_aligned(__p)[__i]; + } + + _LIBCPP_HIDE_FROM_ABI constexpr typename offset_policy::data_handle_type + offset(data_handle_type __p, size_t __i) const noexcept { + return std::assume_aligned(__p) + __i; + } +}; + +#endif // _LIBCPP_STD_VER >= 26 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___MDSPAN_ALIGNED_ACCESSOR_H diff --git a/lib/libcxx/include/__mdspan/extents.h b/lib/libcxx/include/__mdspan/extents.h index 65a697769b..99b54badf8 100644 --- a/lib/libcxx/include/__mdspan/extents.h +++ b/lib/libcxx/include/__mdspan/extents.h @@ -21,11 +21,10 @@ #include <__config> #include <__concepts/arithmetic.h> -#include <__cstddef/byte.h> #include <__type_traits/common_type.h> +#include <__type_traits/integer_traits.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_same.h> #include <__type_traits/make_unsigned.h> #include <__utility/integer_sequence.h> #include <__utility/unreachable.h> @@ -283,7 +282,8 @@ public: using size_type = make_unsigned_t; using rank_type = size_t; - static_assert(__libcpp_integer, "extents::index_type must be a signed or unsigned integer type"); + static_assert(__signed_or_unsigned_integer, + "extents::index_type must be a signed or unsigned integer type"); static_assert(((__mdspan_detail::__is_representable_as(_Extents) || (_Extents == dynamic_extent)) && ...), "extents ctor: arguments must be representable as index_type and nonnegative"); @@ -440,13 +440,13 @@ struct __make_dextents; template struct __make_dextents< _IndexType, _Rank, extents<_IndexType, _ExtentsPack...>> { - using type = + using type _LIBCPP_NODEBUG = typename __make_dextents< _IndexType, _Rank - 1, extents<_IndexType, dynamic_extent, _ExtentsPack...>>::type; }; template struct __make_dextents< _IndexType, 0, extents<_IndexType, _ExtentsPack...>> { - using type = extents<_IndexType, _ExtentsPack...>; + using type _LIBCPP_NODEBUG = extents<_IndexType, _ExtentsPack...>; }; } // namespace __mdspan_detail diff --git a/lib/libcxx/include/__mdspan/layout_left.h b/lib/libcxx/include/__mdspan/layout_left.h index 288b3dd803..2f515afb6c 100644 --- a/lib/libcxx/include/__mdspan/layout_left.h +++ b/lib/libcxx/include/__mdspan/layout_left.h @@ -21,6 +21,7 @@ #include <__config> #include <__fwd/mdspan.h> #include <__mdspan/extents.h> +#include <__memory/addressof.h> #include <__type_traits/common_type.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> @@ -58,7 +59,7 @@ private: index_type __prod = __ext.extent(0); for (rank_type __r = 1; __r < extents_type::rank(); __r++) { - bool __overflowed = __builtin_mul_overflow(__prod, __ext.extent(__r), &__prod); + bool __overflowed = __builtin_mul_overflow(__prod, __ext.extent(__r), std::addressof(__prod)); if (__overflowed) return false; } diff --git a/lib/libcxx/include/__mdspan/layout_right.h b/lib/libcxx/include/__mdspan/layout_right.h index 72922d1049..ccfbd23e28 100644 --- a/lib/libcxx/include/__mdspan/layout_right.h +++ b/lib/libcxx/include/__mdspan/layout_right.h @@ -22,6 +22,7 @@ #include <__cstddef/size_t.h> #include <__fwd/mdspan.h> #include <__mdspan/extents.h> +#include <__memory/addressof.h> #include <__type_traits/common_type.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> @@ -58,7 +59,7 @@ private: index_type __prod = __ext.extent(0); for (rank_type __r = 1; __r < extents_type::rank(); __r++) { - bool __overflowed = __builtin_mul_overflow(__prod, __ext.extent(__r), &__prod); + bool __overflowed = __builtin_mul_overflow(__prod, __ext.extent(__r), std::addressof(__prod)); if (__overflowed) return false; } diff --git a/lib/libcxx/include/__mdspan/layout_stride.h b/lib/libcxx/include/__mdspan/layout_stride.h index bb93de9775..9d77d71bc3 100644 --- a/lib/libcxx/include/__mdspan/layout_stride.h +++ b/lib/libcxx/include/__mdspan/layout_stride.h @@ -22,6 +22,7 @@ #include <__config> #include <__fwd/mdspan.h> #include <__mdspan/extents.h> +#include <__memory/addressof.h> #include <__type_traits/common_type.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> @@ -86,7 +87,7 @@ private: index_type __prod = __ext.extent(0); for (rank_type __r = 1; __r < __rank_; __r++) { - bool __overflowed = __builtin_mul_overflow(__prod, __ext.extent(__r), &__prod); + bool __overflowed = __builtin_mul_overflow(__prod, __ext.extent(__r), std::addressof(__prod)); if (__overflowed) return false; } @@ -109,11 +110,12 @@ private: } if (__ext.extent(__r) == static_cast(0)) return true; - index_type __prod = (__ext.extent(__r) - 1); - bool __overflowed_mul = __builtin_mul_overflow(__prod, static_cast(__strides[__r]), &__prod); + index_type __prod = (__ext.extent(__r) - 1); + bool __overflowed_mul = + __builtin_mul_overflow(__prod, static_cast(__strides[__r]), std::addressof(__prod)); if (__overflowed_mul) return false; - bool __overflowed_add = __builtin_add_overflow(__size, __prod, &__size); + bool __overflowed_add = __builtin_add_overflow(__size, __prod, std::addressof(__size)); if (__overflowed_add) return false; } diff --git a/lib/libcxx/include/__mdspan/mdspan.h b/lib/libcxx/include/__mdspan/mdspan.h index 3f9b35b185..c0f2767819 100644 --- a/lib/libcxx/include/__mdspan/mdspan.h +++ b/lib/libcxx/include/__mdspan/mdspan.h @@ -20,8 +20,10 @@ #include <__assert> #include <__config> #include <__fwd/mdspan.h> +#include <__mdspan/aligned_accessor.h> #include <__mdspan/default_accessor.h> #include <__mdspan/extents.h> +#include <__memory/addressof.h> #include <__type_traits/extent.h> #include <__type_traits/is_abstract.h> #include <__type_traits/is_array.h> @@ -215,7 +217,7 @@ public: _LIBCPP_ASSERT_UNCATEGORIZED( false == ([&](index_sequence<_Idxs...>) { size_type __prod = 1; - return (__builtin_mul_overflow(__prod, extent(_Idxs), &__prod) || ... || false); + return (__builtin_mul_overflow(__prod, extent(_Idxs), std::addressof(__prod)) || ... || false); }(make_index_sequence())), "mdspan: size() is not representable as size_type"); return [&](index_sequence<_Idxs...>) { @@ -266,13 +268,13 @@ private: # if _LIBCPP_STD_VER >= 26 template requires((is_convertible_v<_OtherIndexTypes, size_t> && ...) && (sizeof...(_OtherIndexTypes) > 0)) -explicit mdspan(_ElementType*, - _OtherIndexTypes...) -> mdspan<_ElementType, extents...>>; +explicit mdspan(_ElementType*, _OtherIndexTypes...) + -> mdspan<_ElementType, extents...>>; # else template requires((is_convertible_v<_OtherIndexTypes, size_t> && ...) && (sizeof...(_OtherIndexTypes) > 0)) -explicit mdspan(_ElementType*, - _OtherIndexTypes...) -> mdspan<_ElementType, dextents>; +explicit mdspan(_ElementType*, _OtherIndexTypes...) + -> mdspan<_ElementType, dextents>; # endif template diff --git a/lib/libcxx/include/__memory/addressof.h b/lib/libcxx/include/__memory/addressof.h index 98b08958a6..667071dfc6 100644 --- a/lib/libcxx/include/__memory/addressof.h +++ b/lib/libcxx/include/__memory/addressof.h @@ -23,7 +23,7 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* a return __builtin_addressof(__x); } -#if _LIBCPP_HAS_OBJC_ARC +#if __has_feature(objc_arc) // Objective-C++ Automatic Reference Counting uses qualified pointers // that require special addressof() signatures. template @@ -31,7 +31,7 @@ inline _LIBCPP_HIDE_FROM_ABI __strong _Tp* addressof(__strong _Tp& __x) _NOEXCEP return &__x; } -# if _LIBCPP_HAS_OBJC_ARC_WEAK +# if __has_feature(objc_arc_weak) template inline _LIBCPP_HIDE_FROM_ABI __weak _Tp* addressof(__weak _Tp& __x) _NOEXCEPT { return &__x; diff --git a/lib/libcxx/include/__memory/allocation_guard.h b/lib/libcxx/include/__memory/allocation_guard.h index 66edcd92ed..016e1a3a42 100644 --- a/lib/libcxx/include/__memory/allocation_guard.h +++ b/lib/libcxx/include/__memory/allocation_guard.h @@ -49,24 +49,26 @@ struct __allocation_guard { using _Size _LIBCPP_NODEBUG = typename allocator_traits<_Alloc>::size_type; template // we perform the allocator conversion inside the constructor - _LIBCPP_HIDE_FROM_ABI explicit __allocation_guard(_AllocT __alloc, _Size __n) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __allocation_guard(_AllocT __alloc, _Size __n) : __alloc_(std::move(__alloc)), __n_(__n), __ptr_(allocator_traits<_Alloc>::allocate(__alloc_, __n_)) // initialization order is important {} - _LIBCPP_HIDE_FROM_ABI ~__allocation_guard() _NOEXCEPT { __destroy(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI ~__allocation_guard() _NOEXCEPT { __destroy(); } - _LIBCPP_HIDE_FROM_ABI __allocation_guard(const __allocation_guard&) = delete; - _LIBCPP_HIDE_FROM_ABI __allocation_guard(__allocation_guard&& __other) _NOEXCEPT + __allocation_guard(const __allocation_guard&) = delete; + __allocation_guard& operator=(const __allocation_guard& __other) = delete; + + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __allocation_guard(__allocation_guard&& __other) _NOEXCEPT : __alloc_(std::move(__other.__alloc_)), __n_(__other.__n_), __ptr_(__other.__ptr_) { __other.__ptr_ = nullptr; } - _LIBCPP_HIDE_FROM_ABI __allocation_guard& operator=(const __allocation_guard& __other) = delete; - _LIBCPP_HIDE_FROM_ABI __allocation_guard& operator=(__allocation_guard&& __other) _NOEXCEPT { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __allocation_guard& + operator=(__allocation_guard&& __other) _NOEXCEPT { if (std::addressof(__other) != this) { __destroy(); @@ -79,17 +81,17 @@ struct __allocation_guard { return *this; } - _LIBCPP_HIDE_FROM_ABI _Pointer + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI _Pointer __release_ptr() _NOEXCEPT { // not called __release() because it's a keyword in objective-c++ _Pointer __tmp = __ptr_; __ptr_ = nullptr; return __tmp; } - _LIBCPP_HIDE_FROM_ABI _Pointer __get() const _NOEXCEPT { return __ptr_; } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI _Pointer __get() const _NOEXCEPT { return __ptr_; } private: - _LIBCPP_HIDE_FROM_ABI void __destroy() _NOEXCEPT { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __destroy() _NOEXCEPT { if (__ptr_ != nullptr) { allocator_traits<_Alloc>::deallocate(__alloc_, __ptr_, __n_); } diff --git a/lib/libcxx/include/__memory/allocator.h b/lib/libcxx/include/__memory/allocator.h index 191a59e661..52f4122a9b 100644 --- a/lib/libcxx/include/__memory/allocator.h +++ b/lib/libcxx/include/__memory/allocator.h @@ -38,7 +38,7 @@ class allocator; // These specializations shouldn't be marked _LIBCPP_DEPRECATED_IN_CXX17. // Specializing allocator is deprecated, but not using it. template <> -class _LIBCPP_TEMPLATE_VIS allocator { +class allocator { public: _LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; @@ -77,7 +77,7 @@ struct __non_trivial_if { // allocator trivial in C++20. template -class _LIBCPP_TEMPLATE_VIS allocator : private __non_trivial_if::value, allocator<_Tp> > { +class allocator : private __non_trivial_if::value, allocator<_Tp> > { static_assert(!is_const<_Tp>::value, "std::allocator does not support const types"); static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types"); @@ -98,7 +98,7 @@ public: [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) { static_assert(sizeof(_Tp) >= 0, "cannot allocate memory for an incomplete type"); if (__n > allocator_traits::max_size(*this)) - __throw_bad_array_new_length(); + std::__throw_bad_array_new_length(); if (__libcpp_is_constant_evaluated()) { return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp))); } else { diff --git a/lib/libcxx/include/__memory/allocator_arg_t.h b/lib/libcxx/include/__memory/allocator_arg_t.h index 72a0a9c399..31a73fc455 100644 --- a/lib/libcxx/include/__memory/allocator_arg_t.h +++ b/lib/libcxx/include/__memory/allocator_arg_t.h @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -struct _LIBCPP_TEMPLATE_VIS allocator_arg_t { +struct allocator_arg_t { explicit allocator_arg_t() = default; }; diff --git a/lib/libcxx/include/__memory/allocator_traits.h b/lib/libcxx/include/__memory/allocator_traits.h index 2d9ab847e9..46c247f704 100644 --- a/lib/libcxx/include/__memory/allocator_traits.h +++ b/lib/libcxx/include/__memory/allocator_traits.h @@ -36,12 +36,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#define _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(NAME, PROPERTY) \ - template \ - struct NAME : false_type {}; \ - template \ - struct NAME<_Tp, __void_t > : true_type {} - +_LIBCPP_SUPPRESS_DEPRECATED_PUSH // __pointer template using __pointer_member _LIBCPP_NODEBUG = typename _Tp::pointer; @@ -49,50 +44,45 @@ using __pointer_member _LIBCPP_NODEBUG = typename _Tp::pointer; template using __pointer _LIBCPP_NODEBUG = __detected_or_t<_Tp*, __pointer_member, __libcpp_remove_reference_t<_Alloc> >; -// __const_pointer -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_pointer, const_pointer); -template ::value> -struct __const_pointer { - using type _LIBCPP_NODEBUG = typename _Alloc::const_pointer; -}; -template -struct __const_pointer<_Tp, _Ptr, _Alloc, false> { +// This trait returns _Alias<_Alloc> if that's well-formed, and _Ptr rebound to _Tp otherwise +template class _Alias, class _Ptr, class _Tp, class = void> +struct __rebind_or_alias_pointer { #ifdef _LIBCPP_CXX03_LANG - using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind::other; + using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<_Tp>::other; #else - using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind; + using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<_Tp>; #endif }; +template class _Alias> +struct __rebind_or_alias_pointer<_Alloc, _Alias, _Ptr, _Tp, __void_t<_Alias<_Alloc> > > { + using type _LIBCPP_NODEBUG = _Alias<_Alloc>; +}; + +// __const_pointer +template +using __const_pointer_member _LIBCPP_NODEBUG = typename _Alloc::const_pointer; + +template +using __const_pointer_t _LIBCPP_NODEBUG = + typename __rebind_or_alias_pointer<_Alloc, __const_pointer_member, _Ptr, const _Tp>::type; +_LIBCPP_SUPPRESS_DEPRECATED_POP + // __void_pointer -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_void_pointer, void_pointer); -template ::value> -struct __void_pointer { - using type _LIBCPP_NODEBUG = typename _Alloc::void_pointer; -}; +template +using __void_pointer_member _LIBCPP_NODEBUG = typename _Alloc::void_pointer; + template -struct __void_pointer<_Ptr, _Alloc, false> { -#ifdef _LIBCPP_CXX03_LANG - using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind::other; -#else - using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind; -#endif -}; +using __void_pointer_t _LIBCPP_NODEBUG = + typename __rebind_or_alias_pointer<_Alloc, __void_pointer_member, _Ptr, void>::type; // __const_void_pointer -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_void_pointer, const_void_pointer); -template ::value> -struct __const_void_pointer { - using type _LIBCPP_NODEBUG = typename _Alloc::const_void_pointer; -}; +template +using __const_void_pointer_member _LIBCPP_NODEBUG = typename _Alloc::const_void_pointer; + template -struct __const_void_pointer<_Ptr, _Alloc, false> { -#ifdef _LIBCPP_CXX03_LANG - using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind::other; -#else - using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind; -#endif -}; +using __const_void_pointer_t _LIBCPP_NODEBUG = + typename __rebind_or_alias_pointer<_Alloc, __const_void_pointer_member, _Ptr, const void>::type; // __size_type template @@ -102,13 +92,13 @@ template using __size_type _LIBCPP_NODEBUG = __detected_or_t<__make_unsigned_t<_DiffType>, __size_type_member, _Alloc>; // __alloc_traits_difference_type -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_alloc_traits_difference_type, difference_type); -template ::value> +template struct __alloc_traits_difference_type { using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::difference_type; }; + template -struct __alloc_traits_difference_type<_Alloc, _Ptr, true> { +struct __alloc_traits_difference_type<_Alloc, _Ptr, __void_t > { using type _LIBCPP_NODEBUG = typename _Alloc::difference_type; }; @@ -138,6 +128,7 @@ template using __propagate_on_container_swap _LIBCPP_NODEBUG = __detected_or_t; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH // __is_always_equal template using __is_always_equal_member _LIBCPP_NODEBUG = typename _Tp::is_always_equal; @@ -147,15 +138,14 @@ using __is_always_equal _LIBCPP_NODEBUG = __detected_or_t::type, __is_always_equal_member, _Alloc>; // __allocator_traits_rebind -_LIBCPP_SUPPRESS_DEPRECATED_PUSH template -struct __has_rebind_other : false_type {}; +inline const bool __has_rebind_other_v = false; template -struct __has_rebind_other<_Tp, _Up, __void_t::other> > : true_type {}; +inline const bool __has_rebind_other_v<_Tp, _Up, __void_t::other> > = true; -template ::value> +template > struct __allocator_traits_rebind { - static_assert(__has_rebind_other<_Tp, _Up>::value, "This allocator has to implement rebind"); + static_assert(__has_rebind_other_v<_Tp, _Up>, "This allocator has to implement rebind"); using type _LIBCPP_NODEBUG = typename _Tp::template rebind<_Up>::other; }; template