From 7051e794ceb6399429ab1b961a13e6876ea93943 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Jan 2021 15:21:13 -0800 Subject: [PATCH 001/244] Drop the 'git' suffix from various version variables --- libcxx/CMakeLists.txt | 2 +- libcxxabi/CMakeLists.txt | 2 +- libunwind/CMakeLists.txt | 2 +- llvm/CMakeLists.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 4e7e8f978546..9bf1a02f0908 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -29,7 +29,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL project(libcxx CXX C) set(PACKAGE_NAME libcxx) - set(PACKAGE_VERSION 12.0.0git) + set(PACKAGE_VERSION 12.0.0) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt index b803347c2a8e..426c855288fc 100644 --- a/libcxxabi/CMakeLists.txt +++ b/libcxxabi/CMakeLists.txt @@ -28,7 +28,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B project(libcxxabi CXX C) set(PACKAGE_NAME libcxxabi) - set(PACKAGE_VERSION 11.0.0git) + set(PACKAGE_VERSION 11.0.0) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 8ae32fbccf4e..48cb8e004e08 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -24,7 +24,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B project(libunwind LANGUAGES C CXX ASM) set(PACKAGE_NAME libunwind) - set(PACKAGE_VERSION 12.0.0git) + set(PACKAGE_VERSION 12.0.0) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 454ec561af9a..277d0fe54d7b 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -14,7 +14,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) - set(LLVM_VERSION_SUFFIX git) + set(LLVM_VERSION_SUFFIX "") endif() if (NOT PACKAGE_VERSION) From f2a45d31b9c11f2b3e12f161391fe845025b5177 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Jan 2021 15:17:48 -0800 Subject: [PATCH 002/244] Import workflows from release/11.x branch --- .github/workflows/clang-tests.yml | 43 +++++++++++ .github/workflows/libclc-tests.yml | 53 +++++++++++++ .github/workflows/lld-tests.yml | 43 +++++++++++ .github/workflows/lldb-tests.yml | 48 ++++++++++++ .github/workflows/llvm-tests.yml | 116 +++++++++++++++++++++++++++++ 5 files changed, 303 insertions(+) create mode 100644 .github/workflows/clang-tests.yml create mode 100644 .github/workflows/libclc-tests.yml create mode 100644 .github/workflows/lld-tests.yml create mode 100644 .github/workflows/lldb-tests.yml create mode 100644 .github/workflows/llvm-tests.yml diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml new file mode 100644 index 000000000000..f8ca65e10726 --- /dev/null +++ b/.github/workflows/clang-tests.yml @@ -0,0 +1,43 @@ +name: Clang Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - '.github/workflows/clang-tests.yml' + pull_request: + paths: + - 'clang/**' + - 'llvm/**' + - '.github/workflows/clang-tests.yml' + +jobs: + build_clang: + name: clang check-all + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Test clang + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release + build_target: check-clang diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml new file mode 100644 index 000000000000..4e8639b1c89a --- /dev/null +++ b/.github/workflows/libclc-tests.yml @@ -0,0 +1,53 @@ +name: libclc Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - 'libclc/**' + - '.github/workflows/libclc-tests.yml' + pull_request: + paths: + - 'clang/**' + - 'llvm/**' + - 'libclc/**' + - '.github/workflows/libclc-tests.yml' + +jobs: + build_libclc: + name: libclc test + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + # Disable build on windows, because I can't figure out where llvm-config is. + #- windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Build clang + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release + build_target: "" + - name: Build and test libclc + run: | + mkdir libclc-build + cd libclc-build + cmake -G Ninja ../libclc -DLLVM_CONFIG=../build/bin/llvm-config + ninja + ninja test diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml new file mode 100644 index 000000000000..9b4cbe95f231 --- /dev/null +++ b/.github/workflows/lld-tests.yml @@ -0,0 +1,43 @@ +name: LLD Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'lld/**' + - 'llvm/**' + - '.github/workflows/lld-tests.yml' + pull_request: + paths: + - 'lld/**' + - 'llvm/**' + - '.github/workflows/lld-tests.yml' + +jobs: + build_lld: + name: lld check-all + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Test lld + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="lld" -DCMAKE_BUILD_TYPE=Release + build_target: check-lld diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml new file mode 100644 index 000000000000..229e6deece6e --- /dev/null +++ b/.github/workflows/lldb-tests.yml @@ -0,0 +1,48 @@ +name: lldb Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - 'llvm/**' + - 'lldb/**' + - '.github/workflows/lldb-tests.yml' + pull_request: + paths: + - 'clang/**' + - 'llvm/**' + - 'lldb/**' + - '.github/workflows/lldb-tests.yml' + +jobs: + build_lldb: + name: lldb build + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + # macOS build disabled due to: llvm.org/PR46190 + #- macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Build lldb + uses: llvm/actions/build-test-llvm-project@master + with: + # Mac OS requries that libcxx is enabled for lldb tests, so we need to disable them. + cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang;lldb" -DCMAKE_BUILD_TYPE=Release -DLLDB_INCLUDE_TESTS=OFF + # check-lldb is not consistent, so we only build lldb. + build_target: "" diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml new file mode 100644 index 000000000000..67f318ad849f --- /dev/null +++ b/.github/workflows/llvm-tests.yml @@ -0,0 +1,116 @@ +name: LLVM Tests + +env: + release_major: 12 + +on: + push: + branches: + - 'release/**' + paths: + - 'llvm/**' + - '.github/workflows/llvm-tests.yml' + pull_request: + paths: + - 'llvm/**' + - '.github/workflows/llvm-tests.yml' + +jobs: + build_llvm: + name: llvm check-all + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + steps: + - name: Setup Windows + if: startsWith(matrix.os, 'windows') + uses: llvm/actions/setup-windows@master + with: + arch: amd64 + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Test llvm + uses: llvm/actions/build-test-llvm-project@master + with: + cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release + + abi-dump: + runs-on: ubuntu-latest + strategy: + matrix: + name: + - build-baseline + - build-latest + include: + - name: build-baseline + # FIXME: Referencing the env context does not work here + # ref: llvmorg-${{ env.release_major }}.0.0 + ref: llvmorg-12.0.0 + repo: llvm/llvm-project + - name: build-latest + ref: ${{ github.sha }} + repo: ${{ github.repository }} + steps: + - name: Install Ninja + uses: llvm/actions/install-ninja@master + - name: Install abi-compliance-checker + run: | + sudo apt-get install abi-dumper autoconf pkg-config + - name: Install universal-ctags + run: | + git clone https://github.com/universal-ctags/ctags.git + cd ctags + ./autogen.sh + ./configure + sudo make install + - name: Download source code + uses: llvm/actions/get-llvm-project-src@master + with: + ref: ${{ matrix.ref }} + repo: ${{ matrix.repo }} + - name: Configure + run: | + mkdir build + cd build + cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" ../llvm + - name: Build + run: ninja -C build libLLVM-${{ env.release_major }}.so + - name: Dump ABI + run: abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers llvm/include -o ${{ matrix.ref }}.abi.tar.gz build/lib/libLLVM-${{ env.release_major }}.so + - name: Upload ABI file + uses: actions/upload-artifact@v1 + with: + name: ${{ matrix.name }} + path: ${{ matrix.ref }}.abi.tar.gz + + abi-compare: + runs-on: ubuntu-latest + needs: + - abi-dump + steps: + - name: Download baseline + uses: actions/download-artifact@v1 + with: + name: build-baseline + - name: Download latest + uses: actions/download-artifact@v1 + with: + name: build-latest + - name: Install abi-compliance-checker + run: sudo apt-get install abi-compliance-checker + - name: Compare ABI + run: abi-compliance-checker -l libLLVM-${{ env.release_major}}.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz + - name: Upload ABI Comparison + if: always() + uses: actions/upload-artifact@v1 + with: + name: compat-report-${{ github.sha }} + path: compat_reports/ From d64226e8fab8fc7b4d947223c61036a60eb6a871 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 27 Jan 2021 15:32:05 +0100 Subject: [PATCH 003/244] [clangd] Work around GCC bug 66735 (cherry picked from commit 12de8e1399fecf691639ba430b3824acb1311e70) --- clang-tools-extra/clangd/ParsedAST.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 403d3fe3e64f..1020282f5ee8 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -316,8 +316,8 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, Check->registerMatchers(&CTFinder); } - ASTDiags.setLevelAdjuster([&, &Cfg(Config::current())]( - DiagnosticsEngine::Level DiagLevel, + const Config& Cfg = Config::current(); + ASTDiags.setLevelAdjuster([&](DiagnosticsEngine::Level DiagLevel, const clang::Diagnostic &Info) { if (Cfg.Diagnostics.SuppressAll || isBuiltinDiagnosticSuppressed(Info.getID(), Cfg.Diagnostics.Suppress)) From ea99c885a63de9af673a5e5cd51f44fb70c83c1b Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Jan 2021 12:24:30 -0800 Subject: [PATCH 004/244] Permit __VA_OPT__ in all language modes and allow it to be detected with #ifdef. These changes are intended to give code a path to move away from the GNU ,##__VA_ARGS__ extension, which is non-conforming in some situations and which we'd like to disable in our conforming mode in those cases. (cherry picked from commit 0436ec2128c9775ba13b0308937238fc79673fdd) --- clang/include/clang/Lex/Preprocessor.h | 19 ++++++++++++ .../include/clang/Lex/VariadicMacroSupport.h | 10 ++---- clang/lib/Lex/PPDirectives.cpp | 5 +++ clang/lib/Lex/PPExpressions.cpp | 5 +++ clang/lib/Lex/PPMacroExpansion.cpp | 6 +++- clang/lib/Lex/Preprocessor.cpp | 19 +++++------- clang/test/Preprocessor/macro_vaopt_check.cpp | 31 ++++++++++++++++++- .../test/Preprocessor/macro_vaopt_expand.cpp | 4 ++- 8 files changed, 78 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 68139cb24b31..ba8bdaa23c4c 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -447,6 +447,25 @@ class Preprocessor { ElseLoc(ElseLoc) {} }; + class IfdefMacroNameScopeRAII { + Preprocessor &PP; + bool VAOPTWasPoisoned; + + public: + IfdefMacroNameScopeRAII(Preprocessor &PP) + : PP(PP), VAOPTWasPoisoned(PP.Ident__VA_OPT__->isPoisoned()) { + PP.Ident__VA_OPT__->setIsPoisoned(false); + } + IfdefMacroNameScopeRAII(const IfdefMacroNameScopeRAII&) = delete; + IfdefMacroNameScopeRAII &operator=(const IfdefMacroNameScopeRAII&) = delete; + ~IfdefMacroNameScopeRAII() { Exit(); } + + void Exit() { + if (VAOPTWasPoisoned) + PP.Ident__VA_OPT__->setIsPoisoned(true); + } + }; + private: friend class ASTReader; friend class MacroArgs; diff --git a/clang/include/clang/Lex/VariadicMacroSupport.h b/clang/include/clang/Lex/VariadicMacroSupport.h index 989e0ac703c9..119f02201fc6 100644 --- a/clang/include/clang/Lex/VariadicMacroSupport.h +++ b/clang/include/clang/Lex/VariadicMacroSupport.h @@ -39,17 +39,14 @@ namespace clang { assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned " "outside an ISO C/C++ variadic " "macro definition!"); - assert( - !Ident__VA_OPT__ || - (Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!")); + assert(Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!"); } /// Client code should call this function just before the Preprocessor is /// about to Lex tokens from the definition of a variadic (ISO C/C++) macro. void enterScope() { Ident__VA_ARGS__->setIsPoisoned(false); - if (Ident__VA_OPT__) - Ident__VA_OPT__->setIsPoisoned(false); + Ident__VA_OPT__->setIsPoisoned(false); } /// Client code should call this function as soon as the Preprocessor has @@ -58,8 +55,7 @@ namespace clang { /// (might be explicitly called, and then reinvoked via the destructor). void exitScope() { Ident__VA_ARGS__->setIsPoisoned(true); - if (Ident__VA_OPT__) - Ident__VA_OPT__->setIsPoisoned(true); + Ident__VA_OPT__->setIsPoisoned(true); } ~VariadicMacroScopeGuard() { exitScope(); } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index d6b03d85913d..e2aa93455ea5 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2928,9 +2928,14 @@ void Preprocessor::HandleIfdefDirective(Token &Result, ++NumIf; Token DirectiveTok = Result; + // __VA_OPT__ is allowed as the operand of #if[n]def. + IfdefMacroNameScopeRAII IfdefMacroNameScope(*this); + Token MacroNameTok; ReadMacroName(MacroNameTok); + IfdefMacroNameScope.Exit(); + // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eod)) { // Skip code until we get to #endif. This helps with recovery by not diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 8c120c13d7d2..952fb8f121dc 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -104,6 +104,9 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, SourceLocation beginLoc(PeekTok.getLocation()); Result.setBegin(beginLoc); + // __VA_OPT__ is allowed as the operand of 'defined'. + Preprocessor::IfdefMacroNameScopeRAII IfdefMacroNameScope(PP); + // Get the next token, don't expand it. PP.LexUnexpandedNonComment(PeekTok); @@ -122,6 +125,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexUnexpandedNonComment(PeekTok); } + IfdefMacroNameScope.Exit(); + // If we don't have a pp-identifier now, this is an error. if (PP.CheckMacroName(PeekTok, MU_Other)) return true; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 43d31d6c5732..f6ca04defeb9 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -323,13 +323,16 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) { /// RegisterBuiltinMacro - Register the specified identifier in the identifier /// table and mark it as a builtin macro to be expanded. -static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ +static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name, + bool Disabled = false) { // Get the identifier. IdentifierInfo *Id = PP.getIdentifierInfo(Name); // Mark it as being a macro that is builtin. MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); MI->setIsBuiltinMacro(); + if (Disabled) + MI->DisableMacro(); PP.appendDefMacroDirective(Id, MI); return Id; } @@ -343,6 +346,7 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__"); Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__"); Ident_Pragma = RegisterBuiltinMacro(*this, "_Pragma"); + Ident__VA_OPT__ = RegisterBuiltinMacro(*this, "__VA_OPT__", true); // C++ Standing Document Extensions. if (getLangOpts().CPlusPlus) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 94f1ce91f884..9baba204b324 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -115,23 +115,20 @@ Preprocessor::Preprocessor(std::shared_ptr PPOpts, BuiltinInfo = std::make_unique(); - // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of - // a macro. They get unpoisoned where it is allowed. - (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); - if (getLangOpts().CPlusPlus20) { - (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); - } else { - Ident__VA_OPT__ = nullptr; - } - // Initialize the pragma handlers. RegisterBuiltinPragmas(); // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); + // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of + // a macro. They get unpoisoned where it is allowed. Note that we model + // __VA_OPT__ as a builtin macro to allow #ifdef and friends to detect it. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_ARGS__, diag::ext_pp_bad_vaargs_use); + Ident__VA_OPT__->setIsPoisoned(); + SetPoisonReason(Ident__VA_OPT__, diag::ext_pp_bad_vaopt_use); + if(LangOpts.Borland) { Ident__exception_info = getIdentifierInfo("_exception_info"); Ident___exception_info = getIdentifierInfo("__exception_info"); diff --git a/clang/test/Preprocessor/macro_vaopt_check.cpp b/clang/test/Preprocessor/macro_vaopt_check.cpp index fb52e9946af3..84f3b85871dd 100644 --- a/clang/test/Preprocessor/macro_vaopt_check.cpp +++ b/clang/test/Preprocessor/macro_vaopt_check.cpp @@ -1,4 +1,20 @@ -// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++2a +// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++20 +// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++11 +// RUN: %clang_cc1 -x c %s -Eonly -verify -Wno-all -pedantic -std=c99 + +// Check that support for __VA_OPT__ can be detected by #ifdef. +#ifndef __VA_OPT__ +#error should be defined +#endif + +#ifdef __VA_OPT__ +#else +#error should be defined +#endif + +#if !defined(__VA_OPT__) +#error should be defined +#endif //expected-error@+1{{missing '('}} #define V1(...) __VA_OPT__ @@ -62,3 +78,16 @@ #define V1(...) __VA_OPT__ ((()) #undef V1 +// __VA_OPT__ can't appear anywhere else. +#if __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} +#endif + +#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} + +// Check defined(__VA_OPT__) doesn't leave __VA_OPT__ poisoned. +#define Z(...) (0 __VA_OPT__(|| 1)) +#if defined(__VA_OPT__) && Z(hello) +// OK +#else +#error bad +#endif diff --git a/clang/test/Preprocessor/macro_vaopt_expand.cpp b/clang/test/Preprocessor/macro_vaopt_expand.cpp index 7ec4f6128cfa..5eb0facb83f7 100644 --- a/clang/test/Preprocessor/macro_vaopt_expand.cpp +++ b/clang/test/Preprocessor/macro_vaopt_expand.cpp @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -E %s -pedantic -std=c++2a | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E %s -pedantic -std=c++20 | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E %s -pedantic -std=c++11 | FileCheck -strict-whitespace %s +// RUN: %clang_cc1 -E -x c %s -pedantic -std=c99 | FileCheck -strict-whitespace %s #define LPAREN ( #define RPAREN ) From 9ea2a107ca4055a3a4960cb6dffb84b7f43bd8ea Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Jan 2021 13:14:02 -0800 Subject: [PATCH 005/244] Don't allow __VA_OPT__ to be detected by #ifdef. More study has discovered this to not actually be useful: because current C++20 implementations reject `#ifdef __VA_OPT__`, this can't really be used as a feature-test mechanism. And it's not too hard to detect __VA_OPT__ without this, for example: #define THIRD_ARG(a, b, c, ...) c #define HAS_VA_OPT(...) THIRD_ARG(__VA_OPT__(,), 1, 0, ) #if HAS_VA_OPT(?) Partially reverts 0436ec2128c9775ba13b0308937238fc79673fdd. (cherry picked from commit 5dfa37a76153f2a18ac7fe30721cc1332b672ea2) --- clang/include/clang/Lex/Preprocessor.h | 19 -------------- clang/lib/Lex/PPDirectives.cpp | 5 ---- clang/lib/Lex/PPExpressions.cpp | 5 ---- clang/lib/Lex/PPMacroExpansion.cpp | 6 +---- clang/lib/Lex/Preprocessor.cpp | 15 ++++++----- clang/test/Preprocessor/macro_vaopt_check.cpp | 25 +++---------------- 6 files changed, 11 insertions(+), 64 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index ba8bdaa23c4c..68139cb24b31 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -447,25 +447,6 @@ class Preprocessor { ElseLoc(ElseLoc) {} }; - class IfdefMacroNameScopeRAII { - Preprocessor &PP; - bool VAOPTWasPoisoned; - - public: - IfdefMacroNameScopeRAII(Preprocessor &PP) - : PP(PP), VAOPTWasPoisoned(PP.Ident__VA_OPT__->isPoisoned()) { - PP.Ident__VA_OPT__->setIsPoisoned(false); - } - IfdefMacroNameScopeRAII(const IfdefMacroNameScopeRAII&) = delete; - IfdefMacroNameScopeRAII &operator=(const IfdefMacroNameScopeRAII&) = delete; - ~IfdefMacroNameScopeRAII() { Exit(); } - - void Exit() { - if (VAOPTWasPoisoned) - PP.Ident__VA_OPT__->setIsPoisoned(true); - } - }; - private: friend class ASTReader; friend class MacroArgs; diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e2aa93455ea5..d6b03d85913d 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2928,14 +2928,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result, ++NumIf; Token DirectiveTok = Result; - // __VA_OPT__ is allowed as the operand of #if[n]def. - IfdefMacroNameScopeRAII IfdefMacroNameScope(*this); - Token MacroNameTok; ReadMacroName(MacroNameTok); - IfdefMacroNameScope.Exit(); - // Error reading macro name? If so, diagnostic already issued. if (MacroNameTok.is(tok::eod)) { // Skip code until we get to #endif. This helps with recovery by not diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 952fb8f121dc..8c120c13d7d2 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -104,9 +104,6 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, SourceLocation beginLoc(PeekTok.getLocation()); Result.setBegin(beginLoc); - // __VA_OPT__ is allowed as the operand of 'defined'. - Preprocessor::IfdefMacroNameScopeRAII IfdefMacroNameScope(PP); - // Get the next token, don't expand it. PP.LexUnexpandedNonComment(PeekTok); @@ -125,8 +122,6 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT, PP.LexUnexpandedNonComment(PeekTok); } - IfdefMacroNameScope.Exit(); - // If we don't have a pp-identifier now, this is an error. if (PP.CheckMacroName(PeekTok, MU_Other)) return true; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index f6ca04defeb9..43d31d6c5732 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -323,16 +323,13 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) { /// RegisterBuiltinMacro - Register the specified identifier in the identifier /// table and mark it as a builtin macro to be expanded. -static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name, - bool Disabled = false) { +static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){ // Get the identifier. IdentifierInfo *Id = PP.getIdentifierInfo(Name); // Mark it as being a macro that is builtin. MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation()); MI->setIsBuiltinMacro(); - if (Disabled) - MI->DisableMacro(); PP.appendDefMacroDirective(Id, MI); return Id; } @@ -346,7 +343,6 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__"); Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__"); Ident_Pragma = RegisterBuiltinMacro(*this, "_Pragma"); - Ident__VA_OPT__ = RegisterBuiltinMacro(*this, "__VA_OPT__", true); // C++ Standing Document Extensions. if (getLangOpts().CPlusPlus) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 9baba204b324..177786d90390 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -115,20 +115,19 @@ Preprocessor::Preprocessor(std::shared_ptr PPOpts, BuiltinInfo = std::make_unique(); + // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of + // a macro. They get unpoisoned where it is allowed. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); + (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned(); + SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use); + // Initialize the pragma handlers. RegisterBuiltinPragmas(); // Initialize builtin macros like __LINE__ and friends. RegisterBuiltinMacros(); - // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of - // a macro. They get unpoisoned where it is allowed. Note that we model - // __VA_OPT__ as a builtin macro to allow #ifdef and friends to detect it. - (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); - SetPoisonReason(Ident__VA_ARGS__, diag::ext_pp_bad_vaargs_use); - Ident__VA_OPT__->setIsPoisoned(); - SetPoisonReason(Ident__VA_OPT__, diag::ext_pp_bad_vaopt_use); - if(LangOpts.Borland) { Ident__exception_info = getIdentifierInfo("_exception_info"); Ident___exception_info = getIdentifierInfo("__exception_info"); diff --git a/clang/test/Preprocessor/macro_vaopt_check.cpp b/clang/test/Preprocessor/macro_vaopt_check.cpp index 84f3b85871dd..c5c0ac518bc0 100644 --- a/clang/test/Preprocessor/macro_vaopt_check.cpp +++ b/clang/test/Preprocessor/macro_vaopt_check.cpp @@ -2,20 +2,6 @@ // RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++11 // RUN: %clang_cc1 -x c %s -Eonly -verify -Wno-all -pedantic -std=c99 -// Check that support for __VA_OPT__ can be detected by #ifdef. -#ifndef __VA_OPT__ -#error should be defined -#endif - -#ifdef __VA_OPT__ -#else -#error should be defined -#endif - -#if !defined(__VA_OPT__) -#error should be defined -#endif - //expected-error@+1{{missing '('}} #define V1(...) __VA_OPT__ #undef V1 @@ -82,12 +68,7 @@ #if __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} #endif -#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} - -// Check defined(__VA_OPT__) doesn't leave __VA_OPT__ poisoned. -#define Z(...) (0 __VA_OPT__(|| 1)) -#if defined(__VA_OPT__) && Z(hello) -// OK -#else -#error bad +#ifdef __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} #endif + +#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}} From 9df2b64fc5fa911ca59b3f646806ca3fd6787c2d Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Jan 2021 16:07:51 -0800 Subject: [PATCH 006/244] [cxx_status] Mark P0732R2 as only 'partial', not 'Clang 12', as some of the changes were reverted. (cherry picked from commit 727fc31a9898dfb89610ca1bc05ff86204a77177) --- clang/www/cxx_status.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 685f32dbe0d3..fc3340ec9d96 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1005,7 +1005,7 @@

C++20 implementation status

Class types as non-type template parameters P0732R2 - Clang 12 + Partial P1907R1 From 8d22f25d155113f9cfdf3952dc49088c820f2a77 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 27 Jan 2021 16:28:04 -0800 Subject: [PATCH 007/244] [llvm-c] Move LLVMX86_AMXTypeKind & LLVMPoisonValueValueKind to the bottom to avoid value changes compared with LLVM<=11 Fixes PR48905 (cherry picked from commit 6612c2bb68becda5504099b48082c844503c6d4c) --- llvm/include/llvm-c/Core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 8274213aa839..a78df16ca404 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -160,10 +160,10 @@ typedef enum { LLVMVectorTypeKind, /**< Fixed width SIMD vector type */ LLVMMetadataTypeKind, /**< Metadata */ LLVMX86_MMXTypeKind, /**< X86 MMX */ - LLVMX86_AMXTypeKind, /**< X86 AMX */ LLVMTokenTypeKind, /**< Tokens */ LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */ - LLVMBFloatTypeKind /**< 16 bit brain floating point type */ + LLVMBFloatTypeKind, /**< 16 bit brain floating point type */ + LLVMX86_AMXTypeKind /**< X86 AMX */ } LLVMTypeKind; typedef enum { @@ -270,7 +270,6 @@ typedef enum { LLVMConstantVectorValueKind, LLVMUndefValueValueKind, - LLVMPoisonValueValueKind, LLVMConstantAggregateZeroValueKind, LLVMConstantDataArrayValueKind, LLVMConstantDataVectorValueKind, @@ -283,6 +282,7 @@ typedef enum { LLVMInlineAsmValueKind, LLVMInstructionValueKind, + LLVMPoisonValueValueKind } LLVMValueKind; typedef enum { From 8364f5369eeeb2da8db2bae7716c549930d8df93 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 27 Jan 2021 10:59:28 -0800 Subject: [PATCH 008/244] Revert "Suppress non-conforming GNU paste extension in all standard-conforming modes" This reverts commit f4537935dcdbf390c863591cf556e76c3abab9c1. This reverts commit b43c26d036dcbf7a6881f39e4434cf059364022a. This GNU and MSVC extension turns out to be very popular. Most projects are not using C++20, so cannot use the new __VA_OPT__ feature to be standards conformant. The other workaround, using -std=gnu*, enables too many language extensions and isn't viable. Until there is a way for users to get the behavior provided by the `, ## __VA_ARGS__` extension in the -std=c++17 and earlier language modes, we need to revert this. (cherry picked from commit 61a66e4b5ec18e9e73c2f6334f6b7f7dd4bca77e) --- clang/lib/Lex/TokenLexer.cpp | 10 +++++----- clang/test/Preprocessor/macro_fn_comma_swallow2.c | 5 ----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 97cb2cf0bb8c..da5681aaf478 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -148,12 +148,12 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( return false; // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if - // __VA_ARGS__ is empty, but not in strict mode where there are no - // named arguments, where it remains. With GNU extensions, it is removed - // regardless of named arguments. + // __VA_ARGS__ is empty, but not in strict C99 mode where there are no + // named arguments, where it remains. In all other modes, including C99 + // with GNU extensions, it is removed regardless of named arguments. // Microsoft also appears to support this extension, unofficially. - if (!PP.getLangOpts().GNUMode && !PP.getLangOpts().MSVCCompat && - Macro->getNumParams() < 2) + if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode + && Macro->getNumParams() < 2) return false; // Is a comma available to be removed? diff --git a/clang/test/Preprocessor/macro_fn_comma_swallow2.c b/clang/test/Preprocessor/macro_fn_comma_swallow2.c index 4e4960ca7f18..93ab2b83664a 100644 --- a/clang/test/Preprocessor/macro_fn_comma_swallow2.c +++ b/clang/test/Preprocessor/macro_fn_comma_swallow2.c @@ -1,16 +1,11 @@ // Test the __VA_ARGS__ comma swallowing extensions of various compiler dialects. // RUN: %clang_cc1 -E %s | FileCheck -check-prefix=GCC -strict-whitespace %s -// RUN: %clang_cc1 -E -std=c90 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -std=c99 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -std=c11 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -x c++ %s | FileCheck -check-prefix=GCC -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -std=c++03 %s | FileCheck -check-prefix=C99 -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -std=c++11 %s | FileCheck -check-prefix=C99 -strict-whitespace %s // RUN: %clang_cc1 -E -std=gnu99 %s | FileCheck -check-prefix=GCC -strict-whitespace %s // RUN: %clang_cc1 -E -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s -// RUN: %clang_cc1 -E -x c++ -std=c++11 -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s // RUN: %clang_cc1 -E -DNAMED %s | FileCheck -check-prefix=GCC -strict-whitespace %s // RUN: %clang_cc1 -E -std=c99 -DNAMED %s | FileCheck -check-prefix=C99 -strict-whitespace %s From b0085d205b3063c332a080599830ef0500cb6924 Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Mon, 7 Dec 2020 10:26:49 -0500 Subject: [PATCH 009/244] Itanium Mangling: Mangle `__alignof__` differently than `alignof`. The two operations have acted differently since Clang 8, but were unfortunately mangled the same. The new mangling uses new "vendor extended expression" syntax proposed in https://github.com/itanium-cxx-abi/cxx-abi/issues/112 GCC had the same mangling problem, https://gcc.gnu.org/PR88115, and will hopefully be switching to the same mangling as implemented here. Additionally, fix the mangling of `__uuidof` to use the new extension syntax, instead of its previous nonstandard special-case. Adjusts the demangler accordingly. Differential Revision: https://reviews.llvm.org/D93922 (cherry picked from commit 9c7aeaebb3ac1b94200b59b111742cb6b8f090c2) --- clang/lib/AST/ItaniumMangle.cpp | 103 ++++++++++++------ clang/test/CodeGenCXX/mangle-alignof.cpp | 25 +++++ .../CodeGenCXX/microsoft-uuidof-mangling.cpp | 44 +++++--- libcxxabi/src/demangle/ItaniumDemangle.h | 68 ++++++------ libcxxabi/test/test_demangle.pass.cpp | 14 ++- llvm/include/llvm/Demangle/ItaniumDemangle.h | 68 ++++++------ 6 files changed, 211 insertions(+), 111 deletions(-) create mode 100644 clang/test/CodeGenCXX/mangle-alignof.cpp diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 6c8d5687c64a..668733a4be34 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -558,6 +558,7 @@ class CXXNameMangler { unsigned NumTemplateArgs); void mangleTemplateArgs(TemplateName TN, const TemplateArgumentList &AL); void mangleTemplateArg(TemplateArgument A, bool NeedExactType); + void mangleTemplateArgExpr(const Expr *E); void mangleValueInTemplateArg(QualType T, const APValue &V, bool TopLevel, bool NeedExactType = false); @@ -3528,8 +3529,8 @@ void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) { Out << "u" << VendorQualifier.size() << VendorQualifier; Out << "I"; - mangleTemplateArg(T->getRowExpr(), false); - mangleTemplateArg(T->getColumnExpr(), false); + mangleTemplateArgExpr(T->getRowExpr()); + mangleTemplateArgExpr(T->getColumnExpr()); mangleType(T->getElementType()); Out << "E"; } @@ -3916,6 +3917,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= ds # expr.*expr // ::= sZ # size of a parameter pack // ::= sZ # size of a function parameter pack + // ::= u * E # vendor extended expression // ::= // ::= L E # integer literal // ::= L E # floating literal @@ -4007,14 +4009,26 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { case Expr::CXXUuidofExprClass: { const CXXUuidofExpr *UE = cast(E); - if (UE->isTypeOperand()) { - QualType UuidT = UE->getTypeOperand(Context.getASTContext()); - Out << "u8__uuidoft"; - mangleType(UuidT); + // As of clang 12, uuidof uses the vendor extended expression + // mangling. Previously, it used a special-cased nonstandard extension. + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + Out << "u8__uuidof"; + if (UE->isTypeOperand()) + mangleType(UE->getTypeOperand(Context.getASTContext())); + else + mangleTemplateArgExpr(UE->getExprOperand()); + Out << 'E'; } else { - Expr *UuidExp = UE->getExprOperand(); - Out << "u8__uuidofz"; - mangleExpression(UuidExp, Arity); + if (UE->isTypeOperand()) { + QualType UuidT = UE->getTypeOperand(Context.getASTContext()); + Out << "u8__uuidoft"; + mangleType(UuidT); + } else { + Expr *UuidExp = UE->getExprOperand(); + Out << "u8__uuidofz"; + mangleExpression(UuidExp, Arity); + } } break; } @@ -4312,13 +4326,39 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; } + auto MangleAlignofSizeofArg = [&] { + if (SAE->isArgumentType()) { + Out << 't'; + mangleType(SAE->getArgumentType()); + } else { + Out << 'z'; + mangleExpression(SAE->getArgumentExpr()); + } + }; + switch(SAE->getKind()) { case UETT_SizeOf: Out << 's'; + MangleAlignofSizeofArg(); break; case UETT_PreferredAlignOf: + // As of clang 12, we mangle __alignof__ differently than alignof. (They + // have acted differently since Clang 8, but were previously mangled the + // same.) + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + Out << "u11__alignof__"; + if (SAE->isArgumentType()) + mangleType(SAE->getArgumentType()); + else + mangleTemplateArgExpr(SAE->getArgumentExpr()); + Out << 'E'; + break; + } + LLVM_FALLTHROUGH; case UETT_AlignOf: Out << 'a'; + MangleAlignofSizeofArg(); break; case UETT_VecStep: { DiagnosticsEngine &Diags = Context.getDiags(); @@ -4336,13 +4376,6 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { return; } } - if (SAE->isArgumentType()) { - Out << 't'; - mangleType(SAE->getArgumentType()); - } else { - Out << 'z'; - mangleExpression(SAE->getArgumentExpr()); - } break; } @@ -4971,23 +5004,7 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { mangleType(A.getAsTemplateOrTemplatePattern()); break; case TemplateArgument::Expression: { - // It's possible to end up with a DeclRefExpr here in certain - // dependent cases, in which case we should mangle as a - // declaration. - const Expr *E = A.getAsExpr()->IgnoreParenImpCasts(); - if (const DeclRefExpr *DRE = dyn_cast(E)) { - const ValueDecl *D = DRE->getDecl(); - if (isa(D) || isa(D)) { - Out << 'L'; - mangle(D); - Out << 'E'; - break; - } - } - - Out << 'X'; - mangleExpression(E); - Out << 'E'; + mangleTemplateArgExpr(A.getAsExpr()); break; } case TemplateArgument::Integral: @@ -5044,6 +5061,26 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { } } +void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { + // It's possible to end up with a DeclRefExpr here in certain + // dependent cases, in which case we should mangle as a + // declaration. + E = E->IgnoreParenImpCasts(); + if (const DeclRefExpr *DRE = dyn_cast(E)) { + const ValueDecl *D = DRE->getDecl(); + if (isa(D) || isa(D)) { + Out << 'L'; + mangle(D); + Out << 'E'; + return; + } + } + + Out << 'X'; + mangleExpression(E); + Out << 'E'; +} + /// Determine whether a given value is equivalent to zero-initialization for /// the purpose of discarding a trailing portion of a 'tl' mangling. /// diff --git a/clang/test/CodeGenCXX/mangle-alignof.cpp b/clang/test/CodeGenCXX/mangle-alignof.cpp new file mode 100644 index 000000000000..0a65c7e87a2d --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-alignof.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++11 -Wno-gnu-alignof-expression -emit-llvm %s -o - -triple=%itanium_abi_triple | FileCheck %s --check-prefix=CHECK-NEW +// RUN: %clang_cc1 -std=c++11 -Wno-gnu-alignof-expression -emit-llvm %s -o - -triple=%itanium_abi_triple -fclang-abi-compat=11 | FileCheck %s --check-prefix=CHECK-OLD + +// Verify the difference in mangling for alignof and __alignof__ in a new ABI +// compat mode. + +template void f1(decltype(alignof(T))) {} +template void f1(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f1IiEvDTatT_E +// CHECK-NEW: void @_Z2f1IiEvDTatT_E + +template void f2(decltype(__alignof__(T))) {} +template void f2(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f2IiEvDTatT_E +// CHECK-NEW: void @_Z2f2IiEvDTu11__alignof__T_E + +template void f3(decltype(alignof(T(0)))) {} +template void f3(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f3IiEvDTazcvT_Li0EE +// CHECK-NEW: void @_Z2f3IiEvDTazcvT_Li0EE + +template void f4(decltype(__alignof__(T(0)))) {} +template void f4(__SIZE_TYPE__); +// CHECK-OLD: void @_Z2f4IiEvDTazcvT_Li0EE +// CHECK-NEW: void @_Z2f4IiEvDTu11__alignof__XcvT_Li0EEEE diff --git a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp index ec26be292acc..321f65cacc71 100644 --- a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp +++ b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions | FileCheck %s --check-prefixes=CHECK,CHECK-V12 +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions -fclang-abi-compat=11 | FileCheck %s --check-prefixes=CHECK,CHECK-V11 // rdar://17784718 typedef struct _GUID @@ -24,11 +25,16 @@ struct __declspec(uuid("EAFA1952-66F8-438B-8FBA-AF1BBAE42191")) TestStruct struct __declspec(uuid("EAFA1952-66F8-438B-8FBA-AF1BBAE42191")) OtherStruct {}; -template void test_uuidofType(void *arg[sizeof(__uuidof(T))] = 0) {} +template void test_uuidofType(decltype(__uuidof(T)) arg) {} -template void test_uuidofExpr(void *arg[sizeof(__uuidof(typename T::member))] = 0) {} +template void test_uuidofExpr(decltype(__uuidof(T::member)) arg) {} -struct HasMember { typedef TestStruct member; }; +struct HasMember { + TestStruct member; +}; + +// Ensure that mangling an "expr-primary" argument is handled properly. +template void test_uuidofExpr2(decltype(T{}, __uuidof(HasMember::member)) arg) {} template struct UUIDTestTwo { UUIDTestTwo(); }; @@ -39,19 +45,29 @@ int main(int argc, const char * argv[]) // type had better not mention TestStruct or OtherStruct! UUIDTestTwo<__uuidof(TestStruct)> uuidof_test2; UUIDTestTwo<__uuidof(OtherStruct)> uuidof_test3; - test_uuidofType(); - test_uuidofExpr(); + test_uuidofType(GUID{}); + test_uuidofExpr(GUID{}); + test_uuidofExpr2(GUID{}); return 0; } // CHECK: define{{.*}} i32 @main -// CHECK: call void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: call void @_Z15test_uuidofTypeI10TestStructEvPPv(i8** null) -// CHECK: call void @_Z15test_uuidofExprI9HasMemberEvPPv(i8** null) - +// CHECK: call void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev( +// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev( +// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev( +// CHECK-V11: call void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E( +// CHECK-V12: call void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE( +// CHECK-V11: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( +// CHECK-V12: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( +// CHECK-V11: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( +// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( +// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev -// CHECK: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvPPv -// CHECK: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvPPv +// CHECK-V11: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E( +// CHECK-V12: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE( +// CHECK-V11: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( +// CHECK-V12: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( +// CHECK-V11: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( +// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( +// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC2Ev diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 6bfc02d15379..e5fca98f9271 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -96,7 +96,6 @@ X(InitListExpr) \ X(FoldExpr) \ X(ThrowExpr) \ - X(UUIDOfExpr) \ X(BoolExpr) \ X(StringLiteral) \ X(LambdaExpr) \ @@ -2035,21 +2034,6 @@ class ThrowExpr : public Node { } }; -// MSVC __uuidof extension, generated by clang in -fms-extensions mode. -class UUIDOfExpr : public Node { - Node *Operand; -public: - UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {} - - template void match(Fn F) const { F(Operand); } - - void printLeft(OutputStream &S) const override { - S << "__uuidof("; - Operand->print(S); - S << ")"; - } -}; - class BoolExpr : public Node { bool Value; @@ -5013,6 +4997,43 @@ Node *AbstractManglingParser::parseExpr() { } } return nullptr; + case 'u': { + ++First; + Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr); + if (!Name) + return nullptr; + // Special case legacy __uuidof mangling. The 't' and 'z' appear where the + // standard encoding expects a , and would be otherwise be + // interpreted as node 'short' or 'ellipsis'. However, neither + // __uuidof(short) nor __uuidof(...) can actually appear, so there is no + // actual conflict here. + if (Name->getBaseName() == "__uuidof") { + if (numLeft() < 2) + return nullptr; + if (*First == 't') { + ++First; + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + return make(Name, makeNodeArray(&Ty, &Ty + 1)); + } + if (*First == 'z') { + ++First; + Node *Ex = getDerived().parseExpr(); + if (!Ex) + return nullptr; + return make(Name, makeNodeArray(&Ex, &Ex + 1)); + } + } + size_t ExprsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseTemplateArg(); + if (E == nullptr) + return E; + Names.push_back(E); + } + return make(Name, popTrailingNodeArray(ExprsBegin)); + } case '1': case '2': case '3': @@ -5024,21 +5045,6 @@ Node *AbstractManglingParser::parseExpr() { case '9': return getDerived().parseUnresolvedName(); } - - if (consumeIf("u8__uuidoft")) { - Node *Ty = getDerived().parseType(); - if (!Ty) - return nullptr; - return make(Ty); - } - - if (consumeIf("u8__uuidofz")) { - Node *Ex = getDerived().parseExpr(); - if (!Ex) - return nullptr; - return make(Ex); - } - return nullptr; } diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp index 3954fdba048e..512cc3928fdd 100644 --- a/libcxxabi/test/test_demangle.pass.cpp +++ b/libcxxabi/test/test_demangle.pass.cpp @@ -29776,8 +29776,18 @@ const char* cases[][2] = // Vendor extension types are substitution candidates. {"_Z1fu3fooS_", "f(foo, foo)"}, - {"_ZN3FooIXu8__uuidofzdeL_Z3sucEEEC1Ev", "Foo<__uuidof(*(suc))>::Foo()"}, - {"_ZN3FooIXu8__uuidoft13SomeUUIDClassEEC1Ev", "Foo<__uuidof(SomeUUIDClass)>::Foo()"}, + // alignof with type and expression, and __alignof__ with the same. + {"_Z2f1IiEvDTatT_E", "void f1(decltype(alignof (int)))"}, + {"_Z2f3IiEvDTazcvT_Li0EE", "void f3(decltype(alignof ((int)(0))))"}, + {"_Z2f2IiEvDTu11__alignof__T_EE", "void f2(decltype(__alignof__(int)))"}, + {"_Z2f4IiEvDTu11__alignof__XcvT_Li0EEEE", "void f4(decltype(__alignof__((int)(0))))"}, + + // Legacy nonstandard mangling for __uuidof. + {"_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E", "void test_uuidofType(decltype(__uuidof(TestStruct)))"}, + {"_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE", "void test_uuidofExpr(decltype(__uuidof(HasMember::member)))"}, + // Current __uuidof mangling using vendor extended expression. + {"_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE", "void test_uuidofType(decltype(__uuidof(TestStruct)))"}, + {"_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE", "void test_uuidofExpr(decltype(__uuidof(HasMember::member)))"}, // C++2a char8_t: {"_ZTSPDu", "typeinfo name for char8_t*"}, diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 6bfc02d15379..e5fca98f9271 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -96,7 +96,6 @@ X(InitListExpr) \ X(FoldExpr) \ X(ThrowExpr) \ - X(UUIDOfExpr) \ X(BoolExpr) \ X(StringLiteral) \ X(LambdaExpr) \ @@ -2035,21 +2034,6 @@ class ThrowExpr : public Node { } }; -// MSVC __uuidof extension, generated by clang in -fms-extensions mode. -class UUIDOfExpr : public Node { - Node *Operand; -public: - UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {} - - template void match(Fn F) const { F(Operand); } - - void printLeft(OutputStream &S) const override { - S << "__uuidof("; - Operand->print(S); - S << ")"; - } -}; - class BoolExpr : public Node { bool Value; @@ -5013,6 +4997,43 @@ Node *AbstractManglingParser::parseExpr() { } } return nullptr; + case 'u': { + ++First; + Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr); + if (!Name) + return nullptr; + // Special case legacy __uuidof mangling. The 't' and 'z' appear where the + // standard encoding expects a , and would be otherwise be + // interpreted as node 'short' or 'ellipsis'. However, neither + // __uuidof(short) nor __uuidof(...) can actually appear, so there is no + // actual conflict here. + if (Name->getBaseName() == "__uuidof") { + if (numLeft() < 2) + return nullptr; + if (*First == 't') { + ++First; + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + return make(Name, makeNodeArray(&Ty, &Ty + 1)); + } + if (*First == 'z') { + ++First; + Node *Ex = getDerived().parseExpr(); + if (!Ex) + return nullptr; + return make(Name, makeNodeArray(&Ex, &Ex + 1)); + } + } + size_t ExprsBegin = Names.size(); + while (!consumeIf('E')) { + Node *E = getDerived().parseTemplateArg(); + if (E == nullptr) + return E; + Names.push_back(E); + } + return make(Name, popTrailingNodeArray(ExprsBegin)); + } case '1': case '2': case '3': @@ -5024,21 +5045,6 @@ Node *AbstractManglingParser::parseExpr() { case '9': return getDerived().parseUnresolvedName(); } - - if (consumeIf("u8__uuidoft")) { - Node *Ty = getDerived().parseType(); - if (!Ty) - return nullptr; - return make(Ty); - } - - if (consumeIf("u8__uuidofz")) { - Node *Ex = getDerived().parseExpr(); - if (!Ex) - return nullptr; - return make(Ex); - } - return nullptr; } From 7da92afbf08e90960f7e5dee00bbf6ef8f323a5c Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Sun, 24 Jan 2021 15:50:15 -0500 Subject: [PATCH 010/244] Itanium Mangling: Fix handling of in . Previously, we were emitting an extraneous X .. E in around an if the template argument was constructed from an expression (rather than an already-evaluated literal value). In such a case, we would then e.g. emit 'XLi0EE' instead of 'Li0E'. We had one special-case for DeclRefExpr expressions, in particular, to omit them the mangled-name without the surrounding X/E. However, unfortunately, that special case also triggered for ParmVarDecl (a subtype of VarDecl), and _incorrectly_ emitted 'L_Z .. E' instead of the proper 'Xfp_E'. This change causes mangleExpression itself to be responsible for emitting X/E around non-primary expressions, which removes the special-case, and corrects both these problems. Differential Revision: https://reviews.llvm.org/D95487 (cherry picked from commit 8ca33605ff0cfc536f5c6710fb5f6378bf11959a) --- clang/lib/AST/ItaniumMangle.cpp | 223 +++++++++++++----- clang/test/CodeGenCXX/clang-abi-compat.cpp | 94 +++++++- clang/test/CodeGenCXX/mangle-abi-tag.cpp | 2 +- clang/test/CodeGenCXX/mangle-concept.cpp | 4 +- clang/test/CodeGenCXX/mangle-template.cpp | 4 +- clang/test/CodeGenCXX/mangle.cpp | 2 +- clang/test/CodeGenCXX/matrix-type.cpp | 16 +- .../CodeGenCXX/microsoft-uuidof-mangling.cpp | 6 +- 8 files changed, 259 insertions(+), 92 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 668733a4be34..54e2f361a9f1 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -546,8 +546,8 @@ class CXXNameMangler { unsigned knownArity); void mangleCastExpression(const Expr *E, StringRef CastEncoding); void mangleInitListElements(const InitListExpr *InitList); - void mangleDeclRefExpr(const NamedDecl *D); - void mangleExpression(const Expr *E, unsigned Arity = UnknownArity); + void mangleExpression(const Expr *E, unsigned Arity = UnknownArity, + bool AsTemplateArg = false); void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom); void mangleCXXDtorType(CXXDtorType T); @@ -3872,33 +3872,8 @@ void CXXNameMangler::mangleInitListElements(const InitListExpr *InitList) { mangleExpression(InitList->getInit(i)); } -void CXXNameMangler::mangleDeclRefExpr(const NamedDecl *D) { - switch (D->getKind()) { - default: - // ::= L E # external name - Out << 'L'; - mangle(D); - Out << 'E'; - break; - - case Decl::ParmVar: - mangleFunctionParam(cast(D)); - break; - - case Decl::EnumConstant: { - const EnumConstantDecl *ED = cast(D); - mangleIntegerLiteral(ED->getType(), ED->getInitVal()); - break; - } - - case Decl::NonTypeTemplateParm: - const NonTypeTemplateParmDecl *PD = cast(D); - mangleTemplateParameter(PD->getDepth(), PD->getIndex()); - break; - } -} - -void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { +void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, + bool AsTemplateArg) { // ::= // ::= // ::= @@ -3912,6 +3887,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= at # alignof (a type) // ::= // ::= + // ::= fpT # 'this' expression (part of ) // ::= sr # dependent name // ::= sr # dependent template-id // ::= ds # expr.*expr @@ -3920,11 +3896,55 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // ::= u * E # vendor extended expression // ::= // ::= L E # integer literal - // ::= L E # floating literal + // ::= L E # floating literal + // ::= L E # string literal + // ::= L E # nullptr literal "LDnE" + // ::= L 0 E # null pointer template argument + // ::= L _ E # complex floating point literal (C99); not used by clang // ::= L E # external name - // ::= fpT # 'this' expression QualType ImplicitlyConvertedToType; + // A top-level expression that's not needs to be wrapped in + // X...E in a template arg. + bool IsPrimaryExpr = true; + auto NotPrimaryExpr = [&] { + if (AsTemplateArg && IsPrimaryExpr) + Out << 'X'; + IsPrimaryExpr = false; + }; + + auto MangleDeclRefExpr = [&](const NamedDecl *D) { + switch (D->getKind()) { + default: + // ::= L E # external name + Out << 'L'; + mangle(D); + Out << 'E'; + break; + + case Decl::ParmVar: + NotPrimaryExpr(); + mangleFunctionParam(cast(D)); + break; + + case Decl::EnumConstant: { + // + const EnumConstantDecl *ED = cast(D); + mangleIntegerLiteral(ED->getType(), ED->getInitVal()); + break; + } + + case Decl::NonTypeTemplateParm: + NotPrimaryExpr(); + const NonTypeTemplateParmDecl *PD = cast(D); + mangleTemplateParameter(PD->getDepth(), PD->getIndex()); + break; + } + }; + + // 'goto recurse' is used when handling a simple "unwrapping" node which + // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need + // to be preserved. recurse: switch (E->getStmtClass()) { case Expr::NoStmtClass: @@ -3996,6 +4016,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { case Expr::SourceLocExprClass: case Expr::BuiltinBitCastExprClass: { + NotPrimaryExpr(); if (!NullOut) { // As bad as this diagnostic is, it's better than crashing. DiagnosticsEngine &Diags = Context.getDiags(); @@ -4003,11 +4024,13 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { "cannot yet mangle expression type %0"); Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName() << E->getSourceRange(); + return; } break; } case Expr::CXXUuidofExprClass: { + NotPrimaryExpr(); const CXXUuidofExpr *UE = cast(E); // As of clang 12, uuidof uses the vendor extended expression // mangling. Previously, it used a special-cased nonstandard extension. @@ -4027,7 +4050,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } else { Expr *UuidExp = UE->getExprOperand(); Out << "u8__uuidofz"; - mangleExpression(UuidExp, Arity); + mangleExpression(UuidExp); } } break; @@ -4035,13 +4058,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // Even gcc-4.5 doesn't mangle this. case Expr::BinaryConditionalOperatorClass: { + NotPrimaryExpr(); DiagnosticsEngine &Diags = Context.getDiags(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "?: operator with omitted middle operand cannot be mangled"); Diags.Report(E->getExprLoc(), DiagID) << E->getStmtClassName() << E->getSourceRange(); - break; + return; } // These are used for internal purposes and cannot be meaningfully mangled. @@ -4049,6 +4073,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { llvm_unreachable("cannot mangle opaque value; mangling wrong thing?"); case Expr::InitListExprClass: { + NotPrimaryExpr(); Out << "il"; mangleInitListElements(cast(E)); Out << "E"; @@ -4056,6 +4081,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::DesignatedInitExprClass: { + NotPrimaryExpr(); auto *DIE = cast(E); for (const auto &Designator : DIE->designators()) { if (Designator.isFieldDesignator()) { @@ -4077,27 +4103,27 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXDefaultArgExprClass: - mangleExpression(cast(E)->getExpr(), Arity); - break; + E = cast(E)->getExpr(); + goto recurse; case Expr::CXXDefaultInitExprClass: - mangleExpression(cast(E)->getExpr(), Arity); - break; + E = cast(E)->getExpr(); + goto recurse; case Expr::CXXStdInitializerListExprClass: - mangleExpression(cast(E)->getSubExpr(), Arity); - break; + E = cast(E)->getSubExpr(); + goto recurse; case Expr::SubstNonTypeTemplateParmExprClass: - mangleExpression(cast(E)->getReplacement(), - Arity); - break; + E = cast(E)->getReplacement(); + goto recurse; case Expr::UserDefinedLiteralClass: // We follow g++'s approach of mangling a UDL as a call to the literal // operator. case Expr::CXXMemberCallExprClass: // fallthrough case Expr::CallExprClass: { + NotPrimaryExpr(); const CallExpr *CE = cast(E); // ::= cp * E @@ -4128,6 +4154,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXNewExprClass: { + NotPrimaryExpr(); const CXXNewExpr *New = cast(E); if (New->isGlobalNew()) Out << "gs"; Out << (New->isArray() ? "na" : "nw"); @@ -4163,6 +4190,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXPseudoDestructorExprClass: { + NotPrimaryExpr(); const auto *PDE = cast(E); if (const Expr *Base = PDE->getBase()) mangleMemberExprBase(Base, PDE->isArrow()); @@ -4189,6 +4217,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::MemberExprClass: { + NotPrimaryExpr(); const MemberExpr *ME = cast(E); mangleMemberExpr(ME->getBase(), ME->isArrow(), ME->getQualifier(), nullptr, @@ -4199,6 +4228,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::UnresolvedMemberExprClass: { + NotPrimaryExpr(); const UnresolvedMemberExpr *ME = cast(E); mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(), ME->isArrow(), ME->getQualifier(), nullptr, @@ -4209,6 +4239,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXDependentScopeMemberExprClass: { + NotPrimaryExpr(); const CXXDependentScopeMemberExpr *ME = cast(E); mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(), @@ -4221,6 +4252,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::UnresolvedLookupExprClass: { + NotPrimaryExpr(); const UnresolvedLookupExpr *ULE = cast(E); mangleUnresolvedName(ULE->getQualifier(), ULE->getName(), ULE->getTemplateArgs(), ULE->getNumTemplateArgs(), @@ -4229,6 +4261,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXUnresolvedConstructExprClass: { + NotPrimaryExpr(); const CXXUnresolvedConstructExpr *CE = cast(E); unsigned N = CE->getNumArgs(); @@ -4239,7 +4272,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { mangleType(CE->getType()); mangleInitListElements(IL); Out << "E"; - return; + break; } Out << "cv"; @@ -4251,14 +4284,17 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXConstructExprClass: { + // An implicit cast is silent, thus may contain . const auto *CE = cast(E); if (!CE->isListInitialization() || CE->isStdInitListInitialization()) { assert( CE->getNumArgs() >= 1 && (CE->getNumArgs() == 1 || isa(CE->getArg(1))) && "implicit CXXConstructExpr must have one argument"); - return mangleExpression(cast(E)->getArg(0)); + E = cast(E)->getArg(0); + goto recurse; } + NotPrimaryExpr(); Out << "il"; for (auto *E : CE->arguments()) mangleExpression(E); @@ -4267,6 +4303,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXTemporaryObjectExprClass: { + NotPrimaryExpr(); const auto *CE = cast(E); unsigned N = CE->getNumArgs(); bool List = CE->isListInitialization(); @@ -4296,17 +4333,20 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXScalarValueInitExprClass: + NotPrimaryExpr(); Out << "cv"; mangleType(E->getType()); Out << "_E"; break; case Expr::CXXNoexceptExprClass: + NotPrimaryExpr(); Out << "nx"; mangleExpression(cast(E)->getOperand()); break; case Expr::UnaryExprOrTypeTraitExprClass: { + // Non-instantiation-dependent traits are an integer literal. const UnaryExprOrTypeTraitExpr *SAE = cast(E); if (!SAE->isInstantiationDependent()) { @@ -4326,6 +4366,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; } + NotPrimaryExpr(); // But otherwise, they are not. + auto MangleAlignofSizeofArg = [&] { if (SAE->isArgumentType()) { Out << 't'; @@ -4380,6 +4422,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXThrowExprClass: { + NotPrimaryExpr(); const CXXThrowExpr *TE = cast(E); // ::= tw # throw expression // ::= tr # rethrow @@ -4393,6 +4436,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXTypeidExprClass: { + NotPrimaryExpr(); const CXXTypeidExpr *TIE = cast(E); // ::= ti # typeid (type) // ::= te # typeid (expression) @@ -4407,6 +4451,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXDeleteExprClass: { + NotPrimaryExpr(); const CXXDeleteExpr *DE = cast(E); // ::= [gs] dl # [::] delete expr // ::= [gs] da # [::] delete [] expr @@ -4417,6 +4462,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::UnaryOperatorClass: { + NotPrimaryExpr(); const UnaryOperator *UO = cast(E); mangleOperatorName(UnaryOperator::getOverloadedOperator(UO->getOpcode()), /*Arity=*/1); @@ -4425,6 +4471,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ArraySubscriptExprClass: { + NotPrimaryExpr(); const ArraySubscriptExpr *AE = cast(E); // Array subscript is treated as a syntactically weird form of @@ -4436,6 +4483,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::MatrixSubscriptExprClass: { + NotPrimaryExpr(); const MatrixSubscriptExpr *ME = cast(E); Out << "ixix"; mangleExpression(ME->getBase()); @@ -4446,6 +4494,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { case Expr::CompoundAssignOperatorClass: // fallthrough case Expr::BinaryOperatorClass: { + NotPrimaryExpr(); const BinaryOperator *BO = cast(E); if (BO->getOpcode() == BO_PtrMemD) Out << "ds"; @@ -4458,6 +4507,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXRewrittenBinaryOperatorClass: { + NotPrimaryExpr(); // The mangled form represents the original syntax. CXXRewrittenBinaryOperator::DecomposedForm Decomposed = cast(E)->getDecomposedForm(); @@ -4469,6 +4519,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ConditionalOperatorClass: { + NotPrimaryExpr(); const ConditionalOperator *CO = cast(E); mangleOperatorName(OO_Conditional, /*Arity=*/3); mangleExpression(CO->getCond()); @@ -4484,19 +4535,22 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ObjCBridgedCastExprClass: { + NotPrimaryExpr(); // Mangle ownership casts as a vendor extended operator __bridge, // __bridge_transfer, or __bridge_retain. StringRef Kind = cast(E)->getBridgeKindName(); Out << "v1U" << Kind.size() << Kind; + mangleCastExpression(E, "cv"); + break; } - // Fall through to mangle the cast itself. - LLVM_FALLTHROUGH; case Expr::CStyleCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "cv"); break; case Expr::CXXFunctionalCastExprClass: { + NotPrimaryExpr(); auto *Sub = cast(E)->getSubExpr()->IgnoreImplicit(); // FIXME: Add isImplicit to CXXConstructExpr. if (auto *CCE = dyn_cast(Sub)) @@ -4516,22 +4570,28 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXStaticCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "sc"); break; case Expr::CXXDynamicCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "dc"); break; case Expr::CXXReinterpretCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "rc"); break; case Expr::CXXConstCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "cc"); break; case Expr::CXXAddrspaceCastExprClass: + NotPrimaryExpr(); mangleCastExpression(E, "ac"); break; case Expr::CXXOperatorCallExprClass: { + NotPrimaryExpr(); const CXXOperatorCallExpr *CE = cast(E); unsigned NumArgs = CE->getNumArgs(); // A CXXOperatorCallExpr for OO_Arrow models only semantics, not syntax @@ -4545,9 +4605,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ParenExprClass: - mangleExpression(cast(E)->getSubExpr(), Arity); - break; - + E = cast(E)->getSubExpr(); + goto recurse; case Expr::ConceptSpecializationExprClass: { // ::= L E # external name @@ -4561,10 +4620,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::DeclRefExprClass: - mangleDeclRefExpr(cast(E)->getDecl()); + // MangleDeclRefExpr helper handles primary-vs-nonprimary + MangleDeclRefExpr(cast(E)->getDecl()); break; case Expr::SubstNonTypeTemplateParmPackExprClass: + NotPrimaryExpr(); // FIXME: not clear how to mangle this! // template class A { // template void foo(U (&x)[N]...); @@ -4573,14 +4634,16 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; case Expr::FunctionParmPackExprClass: { + NotPrimaryExpr(); // FIXME: not clear how to mangle this! const FunctionParmPackExpr *FPPE = cast(E); Out << "v110_SUBSTPACK"; - mangleDeclRefExpr(FPPE->getParameterPack()); + MangleDeclRefExpr(FPPE->getParameterPack()); break; } case Expr::DependentScopeDeclRefExprClass: { + NotPrimaryExpr(); const DependentScopeDeclRefExpr *DRE = cast(E); mangleUnresolvedName(DRE->getQualifier(), DRE->getDeclName(), DRE->getTemplateArgs(), DRE->getNumTemplateArgs(), @@ -4589,24 +4652,27 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXBindTemporaryExprClass: - mangleExpression(cast(E)->getSubExpr()); - break; + E = cast(E)->getSubExpr(); + goto recurse; case Expr::ExprWithCleanupsClass: - mangleExpression(cast(E)->getSubExpr(), Arity); - break; + E = cast(E)->getSubExpr(); + goto recurse; case Expr::FloatingLiteralClass: { + // const FloatingLiteral *FL = cast(E); mangleFloatLiteral(FL->getType(), FL->getValue()); break; } case Expr::FixedPointLiteralClass: + // Currently unimplemented -- might be in future? mangleFixedPointLiteral(); break; case Expr::CharacterLiteralClass: + // Out << 'L'; mangleType(E->getType()); Out << cast(E)->getValue(); @@ -4615,18 +4681,21 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { // FIXME. __objc_yes/__objc_no are mangled same as true/false case Expr::ObjCBoolLiteralExprClass: + // Out << "Lb"; Out << (cast(E)->getValue() ? '1' : '0'); Out << 'E'; break; case Expr::CXXBoolLiteralExprClass: + // Out << "Lb"; Out << (cast(E)->getValue() ? '1' : '0'); Out << 'E'; break; case Expr::IntegerLiteralClass: { + // llvm::APSInt Value(cast(E)->getValue()); if (E->getType()->isSignedIntegerType()) Value.setIsSigned(true); @@ -4635,6 +4704,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::ImaginaryLiteralClass: { + // const ImaginaryLiteral *IE = cast(E); // Mangle as if a complex literal. // Proposal from David Vandevoorde, 2010.06.30. @@ -4658,6 +4728,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::StringLiteralClass: { + // // Revised proposal from David Vandervoorde, 2010.07.15. Out << 'L'; assert(isa(E->getType())); @@ -4667,21 +4738,25 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::GNUNullExprClass: + // // Mangle as if an integer literal 0. mangleIntegerLiteral(E->getType(), llvm::APSInt(32)); break; case Expr::CXXNullPtrLiteralExprClass: { + // Out << "LDnE"; break; } case Expr::PackExpansionExprClass: + NotPrimaryExpr(); Out << "sp"; mangleExpression(cast(E)->getPattern()); break; case Expr::SizeOfPackExprClass: { + NotPrimaryExpr(); auto *SPE = cast(E); if (SPE->isPartiallySubstituted()) { Out << "sP"; @@ -4706,12 +4781,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { break; } - case Expr::MaterializeTemporaryExprClass: { - mangleExpression(cast(E)->getSubExpr()); - break; - } + case Expr::MaterializeTemporaryExprClass: + E = cast(E)->getSubExpr(); + goto recurse; case Expr::CXXFoldExprClass: { + NotPrimaryExpr(); auto *FE = cast(E); if (FE->isLeftFold()) Out << (FE->getInit() ? "fL" : "fl"); @@ -4733,27 +4808,34 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { } case Expr::CXXThisExprClass: + NotPrimaryExpr(); Out << "fpT"; break; case Expr::CoawaitExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_await"; mangleExpression(cast(E)->getOperand()); break; case Expr::DependentCoawaitExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_await"; mangleExpression(cast(E)->getOperand()); break; case Expr::CoyieldExprClass: // FIXME: Propose a non-vendor mangling. + NotPrimaryExpr(); Out << "v18co_yield"; mangleExpression(cast(E)->getOperand()); break; } + + if (AsTemplateArg && !IsPrimaryExpr) + Out << 'E'; } /// Mangle an expression which refers to a parameter variable. @@ -5003,10 +5085,9 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { Out << "Dp"; mangleType(A.getAsTemplateOrTemplatePattern()); break; - case TemplateArgument::Expression: { + case TemplateArgument::Expression: mangleTemplateArgExpr(A.getAsExpr()); break; - } case TemplateArgument::Integral: mangleIntegerLiteral(A.getIntegralType(), A.getAsIntegral()); break; @@ -5062,9 +5143,22 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) { } void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { - // It's possible to end up with a DeclRefExpr here in certain - // dependent cases, in which case we should mangle as a - // declaration. + ASTContext &Ctx = Context.getASTContext(); + if (Ctx.getLangOpts().getClangABICompat() > LangOptions::ClangABI::Ver11) { + mangleExpression(E, UnknownArity, /*AsTemplateArg=*/true); + return; + } + + // Prior to Clang 12, we didn't omit the X .. E around + // correctly in cases where the template argument was + // constructed from an expression rather than an already-evaluated + // literal. In such a case, we would then e.g. emit 'XLi0EE' instead of + // 'Li0E'. + // + // We did special-case DeclRefExpr to attempt to DTRT for that one + // expression-kind, but while doing so, unfortunately handled ParmVarDecl + // (subtype of VarDecl) _incorrectly_, and emitted 'L_Z .. E' instead of + // the proper 'Xfp_E'. E = E->IgnoreParenImpCasts(); if (const DeclRefExpr *DRE = dyn_cast(E)) { const ValueDecl *D = DRE->getDecl(); @@ -5075,7 +5169,6 @@ void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) { return; } } - Out << 'X'; mangleExpression(E); Out << 'E'; diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp index 46e7ed812cbc..caf06bd5f9f6 100644 --- a/clang/test/CodeGenCXX/clang-abi-compat.cpp +++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp @@ -1,12 +1,23 @@ -// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fclang-abi-compat=3.0 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.0 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.8 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.9 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=4.0 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=5 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s -// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=11 %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s -// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fclang-abi-compat=latest %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s -// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fclang-abi-compat=latest %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17 %s +// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - -Wno-c++11-extensions \ +// RUN: | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.8 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.9 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=4.0 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=5 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE12-CXX20 %s +// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - -Wno-c++11-extensions \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - \ +// RUN: | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20 %s typedef __attribute__((vector_size(8))) long long v1xi64; void clang39(v1xi64) {} @@ -55,3 +66,68 @@ template void clang12_b(); // CHECK: @_Z9clang12_cIXadL_Z3arrEEEvv template void clang12_c() {} template void clang12_c<&arr>(); + + +/// Tests for changes in clang12: +namespace expr_primary { +struct A { + template struct Int {}; + template struct Ref {}; +}; + +/// Check various DeclRefExpr manglings + +// PRE12: @_ZN12expr_primary5test1INS_1AEEEvNT_3IntIXLi1EEEE +// V12: @_ZN12expr_primary5test1INS_1AEEEvNT_3IntILi1EEE +template void test1(typename T::template Int<1> a) {} +template void test1(typename A::template Int<1> a); + +enum Enum { EnumVal = 4 }; +int Global; + +// PRE12: @_ZN12expr_primary5test2INS_1AEEEvNT_3IntIXLNS_4EnumE4EEEE +// V12: @_ZN12expr_primary5test2INS_1AEEEvNT_3IntILNS_4EnumE4EEE +template void test2(typename T::template Int a) {} +template void test2(typename A::template Int<4> a); + +// CHECK: @_ZN12expr_primary5test3ILi3EEEvNS_1A3IntIXT_EEE +template void test3(typename A::template Int a) {} +template void test3<3>(A::Int<3> a); + +#if __cplusplus >= 202002L +// CHECK-CXX20: @_ZN12expr_primary5test4INS_1AEEEvNT_3RefIL_ZNS_6GlobalEEEE +template void test4(typename T::template Ref<(Global)> a) {} +template void test4(typename A::template Ref a); + +struct B { + struct X { + constexpr X(double) {} + constexpr X(int&) {} + }; + template struct Y {}; +}; + +// PRE12-CXX20: _ZN12expr_primary5test5INS_1BEEEvNT_1YIXLd3ff0000000000000EEEE +// V12-CXX20: _ZN12expr_primary5test5INS_1BEEEvNT_1YILd3ff0000000000000EEE +template void test5(typename T::template Y<1.0>) { } +template void test5(typename B::Y<1.0>); + +// PRE12-CXX20: @_ZN12expr_primary5test6INS_1BEEENT_1YIL_ZZNS_5test6EiE1bEEEi +// V12-CXX20: @_ZN12expr_primary5test6INS_1BEEENT_1YIXfp_EEEi +template auto test6(int b) -> typename T::template Y { return {}; } +template auto test6(int b) -> B::Y; +#endif + +/// Verify non-dependent type-traits within a dependent template arg. + +// PRE12: @_ZN12expr_primary5test7INS_1AEEEvNT_3IntIXLm1EEEE +// V12: @_ZN12expr_primary5test7INS_1AEEEvNT_3IntILm1EEE +template void test7(typename T::template Int a) {} +template void test7(A::Int<1>); + +// PRE12: @_ZN12expr_primary5test8ILi2EEEvu11matrix_typeIXLi1EEXT_EiE +// V12: @_ZN12expr_primary5test8ILi2EEEvu11matrix_typeILi1EXT_EiE +template using matrix1xN = int __attribute__((matrix_type(1, N))); +template void test8(matrix1xN a) {} +template void test8<2>(matrix1xN<2> a); +} diff --git a/clang/test/CodeGenCXX/mangle-abi-tag.cpp b/clang/test/CodeGenCXX/mangle-abi-tag.cpp index 5d84096d24cd..9e26604a2c44 100644 --- a/clang/test/CodeGenCXX/mangle-abi-tag.cpp +++ b/clang/test/CodeGenCXX/mangle-abi-tag.cpp @@ -225,7 +225,7 @@ namespace pr30440 { template void g(F); template auto h(A ...a)->decltype (g (0, g < a > (a) ...)) { } -// CHECK-DAG: define {{.*}} @_ZN7pr304401hIJEEEDTcl1gLi0Espcl1gIL_ZZNS_1hEDpT_E1aEEfp_EEES2_( +// CHECK-DAG: define {{.*}} @_ZN7pr304401hIJEEEDTcl1gLi0Espcl1gIXfp_EEfp_EEEDpT_( void pr30440_test () { h(); diff --git a/clang/test/CodeGenCXX/mangle-concept.cpp b/clang/test/CodeGenCXX/mangle-concept.cpp index b0fcd586727e..e60e6348a5f6 100644 --- a/clang/test/CodeGenCXX/mangle-concept.cpp +++ b/clang/test/CodeGenCXX/mangle-concept.cpp @@ -6,11 +6,11 @@ template struct S {}; template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); -// CHECK: @_ZN5test12f0IiEENS_1SIXL_ZNS_1CIT_EEEEEEv( +// CHECK: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv( } template struct S {}; template concept C = true; template S> f0() { return S>{}; } template S> f0<>(); -// CHECK: @_Z2f0IiE1SIXL_Z1CIT_EEEEv( +// CHECK: @_Z2f0IiE1SIL_Z1CIT_EEEv( diff --git a/clang/test/CodeGenCXX/mangle-template.cpp b/clang/test/CodeGenCXX/mangle-template.cpp index 40688de7e12e..9b80a6d64695 100644 --- a/clang/test/CodeGenCXX/mangle-template.cpp +++ b/clang/test/CodeGenCXX/mangle-template.cpp @@ -270,7 +270,7 @@ namespace test17 { // Note: there is no J...E here, because we can't form a pack argument, and // the 5u and 6u are mangled with the original type 'j' (unsigned int) not // with the resolved type 'i' (signed int). - // CHECK: define {{.*}} @_ZN6test171hILi4EJLi1ELi2ELi3EEEEvNS_1XIXspT0_EXLj5EEXT_EXLj6EEEE + // CHECK: define {{.*}} @_ZN6test171hILi4EJLi1ELi2ELi3EEEEvNS_1XIXspT0_ELj5EXT_ELj6EEE template void h(X) {} void i() { h<4, 1, 2, 3>({}); } @@ -323,7 +323,7 @@ namespace partially_dependent_template_args { // callee is unresolved, the rest mangle the converted argument Lj0E // because the callee is resolved. void h() { - // CHECK: @_ZN33partially_dependent_template_args5test22g1INS0_1XEEEvDTcl1fIXLi0EEEcvT__EEE + // CHECK: @_ZN33partially_dependent_template_args5test22g1INS0_1XEEEvDTcl1fILi0EEcvT__EEE g1({}); // CHECK: @_ZN33partially_dependent_template_args5test22g2IiEEvDTplclL_ZNS0_1fILj0EEEiNS0_1XEEilEEcvT__EE g2({}); diff --git a/clang/test/CodeGenCXX/mangle.cpp b/clang/test/CodeGenCXX/mangle.cpp index f8ea9960a5c5..6cec33e3758e 100644 --- a/clang/test/CodeGenCXX/mangle.cpp +++ b/clang/test/CodeGenCXX/mangle.cpp @@ -1123,7 +1123,7 @@ namespace test56 { namespace test57 { struct X { template int f(); } x; template void f(decltype(x.f<0>() + N)) {} - // CHECK-LABEL: @_ZN6test571fILi0EEEvDTplcldtL_ZNS_1xEE1fIXLi0EEEET_E + // CHECK-LABEL: @_ZN6test571fILi0EEEvDTplcldtL_ZNS_1xEE1fILi0EEET_E template void f<0>(int); } diff --git a/clang/test/CodeGenCXX/matrix-type.cpp b/clang/test/CodeGenCXX/matrix-type.cpp index 9bde12e13b86..9e715e10ce1c 100644 --- a/clang/test/CodeGenCXX/matrix-type.cpp +++ b/clang/test/CodeGenCXX/matrix-type.cpp @@ -215,14 +215,14 @@ void test_template_deduction() { // CHECK-NEXT: %m4 = alloca [144 x float], align 4 // CHECK-NEXT: %v = alloca %struct.selector.3, align 1 // CHECK-NEXT: %undef.agg.tmp4 = alloca %struct.selector.3, align 1 - // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeIXLm10EEXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0) + // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0) // CHECK-NEXT: call void @_Z10use_matrixIiE8selectorILi2EERu11matrix_typeILm10ELm10ET_E([100 x i32]* nonnull align 4 dereferenceable(400) %m1) - // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_EXLm10EET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2) + // CHECK-NEXT: call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2) // CHECK-NEXT: call void @_Z10use_matrixIiLm12ELm12EE8selectorILi0EERu11matrix_typeIXT0_EXT1_ET_E([144 x i32]* nonnull align 4 dereferenceable(576) %m3) // CHECK-NEXT: call void @_Z10use_matrixILm12ELm12EE8selectorILi4EERu11matrix_typeIXT_EXT0_EfE([144 x float]* nonnull align 4 dereferenceable(576) %m4) // CHECK-NEXT: ret void - // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeIXLm10EEXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) + // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) // CHECK-NEXT: entry: // CHECK-NEXT: %m.addr = alloca [120 x i32]*, align 8 // CHECK-NEXT: store [120 x i32]* %m, [120 x i32]** %m.addr, align 8 @@ -236,7 +236,7 @@ void test_template_deduction() { // CHECK-NEXT: call void @llvm.trap() // CHECK-NEXT: unreachable - // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_EXLm10EET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) + // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m) // CHECK-NEXT: entry: // CHECK-NEXT: %m.addr = alloca [120 x i32]*, align 8 // CHECK-NEXT: store [120 x i32]* %m, [120 x i32]** %m.addr, align 8 @@ -277,10 +277,10 @@ void test_auto_t() { // CHECK-LABEL: define{{.*}} void @_Z11test_auto_tv() // CHECK-NEXT: entry: // CHECK-NEXT: %m = alloca [130 x i32], align 4 - // CHECK-NEXT: call void @_Z3fooILm13EEvRu11matrix_typeIXT_EXLm10EEiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) + // CHECK-NEXT: call void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) // CHECK-NEXT: ret void - // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_EXLm10EEiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) + // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m) // CHECK-NEXT: entry: // CHECK-NEXT: %m.addr = alloca [130 x i32]*, align 8 // CHECK-NEXT: store [130 x i32]* %m, [130 x i32]** %m.addr, align 8 @@ -326,7 +326,7 @@ void test_use_matrix_2() { // CHECK-NEXT: store <40 x float> %call, <40 x float>* %0, align 4 // CHECK-NEXT: call void @_Z12use_matrix_2ILm2ELm12EE8selectorILi0EERu11matrix_typeIXplT_Li2EEXdvT0_Li2EEiERu11matrix_typeIXT_EXT0_EfE([24 x i32]* nonnull align 4 dereferenceable(96) %m1, [24 x float]* nonnull align 4 dereferenceable(96) %m2) // CHECK-NEXT: call void @_Z12use_matrix_2ILm5ELm8EE8selectorILi1EERu11matrix_typeIXplT_T0_EXT0_EiERu11matrix_typeIXT_EXmiT0_T_EfE([104 x i32]* nonnull align 4 dereferenceable(416) %m3, [15 x float]* nonnull align 4 dereferenceable(60) %m4) - // CHECK-NEXT: %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_EXLm10EEiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5) + // CHECK-NEXT: %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5) // CHECK-NEXT: %1 = bitcast [20 x float]* %r4 to <20 x float>* // CHECK-NEXT: store <20 x float> %call2, <20 x float>* %1, align 4 // CHECK-NEXT: call void @_Z12use_matrix_3ILm6EE8selectorILi2EERu11matrix_typeIXmiT_Li2EEXT_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m1) @@ -357,7 +357,7 @@ void test_use_matrix_2() { // CHECK-NEXT: call void @llvm.trap() // CHECK-NEXT: unreachable - // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_EXLm10EEiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1) + // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1) // CHECK-NEXT: entry: // CHECK-NEXT: %m1.addr = alloca [50 x i32]*, align 8 // CHECK-NEXT: store [50 x i32]* %m1, [50 x i32]** %m1.addr, align 8 diff --git a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp index 321f65cacc71..5c02b1eb014c 100644 --- a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp +++ b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp @@ -60,14 +60,12 @@ int main(int argc, const char * argv[]) // CHECK-V11: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( // CHECK-V12: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( // CHECK-V11: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( -// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( -// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ +// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofL_ZN9HasMember6memberEEEE( // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev // CHECK-V11: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E( // CHECK-V12: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE( // CHECK-V11: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE( // CHECK-V12: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE( // CHECK-V11: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE( -// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE( -// TODO: the above mangling is wrong -- the X/E shouldn't be emitted: ^ ^ +// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofL_ZN9HasMember6memberEEEE( // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC2Ev From 0b7b698fecd37415a635a586e5ca159ab0b8872f Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Sun, 24 Jan 2021 16:23:58 -0500 Subject: [PATCH 011/244] Itanium Mangling: In 'enable_if', omit X/E around . The Clang enable_if extension is mangled as an , which is supposed to contain . However, we were unconditionally emitting X/E around its arguments, neglecting the fact that should be emitted directly without the surrounding X/E. Differential Revision: https://reviews.llvm.org/D95488 (cherry picked from commit a7246ba02a8923f316419a62d836dbe1c0b437bd) --- clang/lib/AST/ItaniumMangle.cpp | 14 +++++++-- clang/test/CodeGen/enable_if.c | 34 +++++++++++----------- clang/test/CodeGenCXX/clang-abi-compat.cpp | 5 ++++ clang/test/CodeGenCXX/enable_if.cpp | 2 +- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 54e2f361a9f1..4420f6a2c1c3 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -727,9 +727,17 @@ void CXXNameMangler::mangleFunctionEncodingBareType(const FunctionDecl *FD) { EnableIfAttr *EIA = dyn_cast(*I); if (!EIA) continue; - Out << 'X'; - mangleExpression(EIA->getCond()); - Out << 'E'; + if (Context.getASTContext().getLangOpts().getClangABICompat() > + LangOptions::ClangABI::Ver11) { + mangleTemplateArgExpr(EIA->getCond()); + } else { + // Prior to Clang 12, we hardcoded the X/E around enable-if's argument, + // even though should not include an X/E around + // . + Out << 'X'; + mangleExpression(EIA->getCond()); + Out << 'E'; + } } Out << 'E'; FunctionTypeDepth.pop(Saved); diff --git a/clang/test/CodeGen/enable_if.c b/clang/test/CodeGen/enable_if.c index 14550b9e2db9..327a201cdeba 100644 --- a/clang/test/CodeGen/enable_if.c +++ b/clang/test/CodeGen/enable_if.c @@ -31,22 +31,22 @@ void bar(int m) __attribute__((overloadable, enable_if(m > 0, ""))); void bar(int m) __attribute__((overloadable, enable_if(1, ""))); // CHECK-LABEL: define{{.*}} void @test2 void test2() { - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi void (*p)(int) = bar; - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi void (*p2)(int) = &bar; - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi p = bar; - // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi p = &bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) void *vp1 = (void*)&bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) void *vp2 = (void*)bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) vp1 = (void*)&bar; - // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*) + // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*) vp1 = (void*)bar; } @@ -54,13 +54,13 @@ void baz(int m) __attribute__((overloadable, enable_if(1, ""))); void baz(int m) __attribute__((overloadable)); // CHECK-LABEL: define{{.*}} void @test3 void test3() { - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi void (*p)(int) = baz; - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi void (*p2)(int) = &baz; - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi p = baz; - // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi + // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi p = &baz; } @@ -71,13 +71,13 @@ void qux(int m) __attribute__((overloadable, enable_if(1, ""), void qux(int m) __attribute__((overloadable, enable_if(1, ""))); // CHECK-LABEL: define{{.*}} void @test4 void test4() { - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi void (*p)(int) = qux; - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi void (*p2)(int) = &qux; - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi p = qux; - // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi + // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi p = &qux; } @@ -90,6 +90,6 @@ void test5() { int foo(char *i __attribute__((pass_object_size(0)))) __attribute__((enable_if(1, ""), overloadable)); - // CHECK: call i32 @_Z3fooUa9enable_ifIXLi1EEEPcU17pass_object_size0 + // CHECK: call i32 @_Z3fooUa9enable_ifILi1EEPcU17pass_object_size0 foo((void*)0); } diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp index caf06bd5f9f6..80311aa320fe 100644 --- a/clang/test/CodeGenCXX/clang-abi-compat.cpp +++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp @@ -130,4 +130,9 @@ template void test7(A::Int<1>); template using matrix1xN = int __attribute__((matrix_type(1, N))); template void test8(matrix1xN a) {} template void test8<2>(matrix1xN<2> a); + +// PRE12: @_ZN12expr_primary5test9EUa9enable_ifIXLi1EEEv +// V12: @_ZN12expr_primary5test9EUa9enable_ifILi1EEv +void test9(void) __attribute__((enable_if(1, ""))) {} + } diff --git a/clang/test/CodeGenCXX/enable_if.cpp b/clang/test/CodeGenCXX/enable_if.cpp index 4e7707aaeed9..70386b87fcee 100644 --- a/clang/test/CodeGenCXX/enable_if.cpp +++ b/clang/test/CodeGenCXX/enable_if.cpp @@ -5,7 +5,7 @@ int test5(int); template T test5(T) __attribute__((enable_if(1, "better than non-template"))); -// CHECK: @_Z5test5IiEUa9enable_ifIXLi1EEET_S0_ +// CHECK: @_Z5test5IiEUa9enable_ifILi1EET_S0_ int (*Ptr)(int) = &test5; // Test itanium mangling for attribute enable_if From de3396d89d998769c3310c23bdd49babade9d874 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 28 Jan 2021 15:30:21 -0800 Subject: [PATCH 012/244] workflows: Update branch names Also remove main-brancy-sync workflow that was removed from the main branch. --- .github/workflows/clang-tests.yml | 6 +++--- .github/workflows/libclc-tests.yml | 6 +++--- .github/workflows/lld-tests.yml | 6 +++--- .github/workflows/lldb-tests.yml | 6 +++--- .github/workflows/llvm-tests.yml | 10 +++++----- .github/workflows/main-branch-sync.yml | 25 ------------------------- 6 files changed, 17 insertions(+), 42 deletions(-) delete mode 100644 .github/workflows/main-branch-sync.yml diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml index f8ca65e10726..af0b5eabeeda 100644 --- a/.github/workflows/clang-tests.yml +++ b/.github/workflows/clang-tests.yml @@ -28,16 +28,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Test clang - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release build_target: check-clang diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml index 4e8639b1c89a..2f1eb2939ea2 100644 --- a/.github/workflows/libclc-tests.yml +++ b/.github/workflows/libclc-tests.yml @@ -31,16 +31,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Build clang - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release build_target: "" diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml index 9b4cbe95f231..bdf0c2fcd886 100644 --- a/.github/workflows/lld-tests.yml +++ b/.github/workflows/lld-tests.yml @@ -28,16 +28,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Test lld - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="lld" -DCMAKE_BUILD_TYPE=Release build_target: check-lld diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml index 229e6deece6e..93fddc2de8c6 100644 --- a/.github/workflows/lldb-tests.yml +++ b/.github/workflows/lldb-tests.yml @@ -31,16 +31,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Build lldb - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: # Mac OS requries that libcxx is enabled for lldb tests, so we need to disable them. cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang;lldb" -DCMAKE_BUILD_TYPE=Release -DLLDB_INCLUDE_TESTS=OFF diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 67f318ad849f..675383407d64 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -29,16 +29,16 @@ jobs: steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') - uses: llvm/actions/setup-windows@master + uses: llvm/actions/setup-windows@main with: arch: amd64 - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - uses: actions/checkout@v1 with: fetch-depth: 1 - name: Test llvm - uses: llvm/actions/build-test-llvm-project@master + uses: llvm/actions/build-test-llvm-project@main with: cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release @@ -60,7 +60,7 @@ jobs: repo: ${{ github.repository }} steps: - name: Install Ninja - uses: llvm/actions/install-ninja@master + uses: llvm/actions/install-ninja@main - name: Install abi-compliance-checker run: | sudo apt-get install abi-dumper autoconf pkg-config @@ -72,7 +72,7 @@ jobs: ./configure sudo make install - name: Download source code - uses: llvm/actions/get-llvm-project-src@master + uses: llvm/actions/get-llvm-project-src@main with: ref: ${{ matrix.ref }} repo: ${{ matrix.repo }} diff --git a/.github/workflows/main-branch-sync.yml b/.github/workflows/main-branch-sync.yml deleted file mode 100644 index 5ea360e281d6..000000000000 --- a/.github/workflows/main-branch-sync.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: main branch sync - -on: - push: - branches: - - 'main' - -jobs: - branch_sync: - runs-on: ubuntu-latest - steps: - - name: Checkout Code - uses: actions/checkout@v2 - with: - # persist-credentials: false allows us to use our own credentials for - # pushing to the repository. Otherwise, the default github actions token - # is used. - persist-credentials: false - fetch-depth: 0 - - - name: Update branch - env: - LLVMBOT_TOKEN: ${{ secrets.LLVMBOT_MAIN_SYNC }} - run: | - git push https://$LLVMBOT_TOKEN@github.com/${{ github.repository }} HEAD:master From 0a32d93bd95b7ad0a4c7f91955c6c815150df84c Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Wed, 27 Jan 2021 09:14:22 +0100 Subject: [PATCH 013/244] [clang-format] Avoid considering include directive as a template closer. This fixes a bug [[ http://llvm.org/PR48891 | PR48891 ]] introduced in D93839 where: ``` #include namespace rep {} ``` got formatted as ``` #include namespace rep { } ``` Reviewed By: MyDeveloperDay, leonardchan Differential Revision: https://reviews.llvm.org/D95479 (cherry picked from commit e3713f156b8cb65a2b74f150afb824ce1e2a2fab) --- clang/lib/Format/UnwrappedLineFormatter.cpp | 2 +- clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index d1138bbc9c36..5dd0ccdfa6fd 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -371,7 +371,7 @@ class LineJoiner { if (Previous->is(tok::comment)) Previous = Previous->getPreviousNonComment(); if (Previous) { - if (Previous->is(tok::greater)) + if (Previous->is(tok::greater) && !I[-1]->InPPDirective) return 0; if (Previous->is(tok::identifier)) { const FormatToken *PreviousPrevious = diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 855cf0242fe9..c1f88b9ae17a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -10248,6 +10248,21 @@ TEST_F(FormatTest, SplitEmptyClass) { "{\n" "};", Style); + + verifyFormat("#include \"stdint.h\"\n" + "namespace rep {}", + Style); + verifyFormat("#include \n" + "namespace rep {}", + Style); + verifyFormat("#include \n" + "namespace rep {}", + "#include \n" + "namespace rep {\n" + "\n" + "\n" + "}", + Style); } TEST_F(FormatTest, SplitEmptyStruct) { From 8c5d184ef714dcf435784e21e66b4b5e25b2dffb Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 27 Jan 2021 16:51:27 -0500 Subject: [PATCH 014/244] clang: Fix static_assert in a few contexts in microsoft mode Follow-up to D17444. Fixes PR48904. See bug for details. Differential Revision: https://reviews.llvm.org/D95559 (cherry picked from commit 764a7a2155c6747ec8d0b38d8edbb65960eae874) --- clang/lib/Parse/ParseDecl.cpp | 3 ++- clang/test/Sema/static-assert.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 571164139630..347d992b1643 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4216,7 +4216,7 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, } // Parse _Static_assert declaration. - if (Tok.is(tok::kw__Static_assert)) { + if (Tok.isOneOf(tok::kw__Static_assert, tok::kw_static_assert)) { SourceLocation DeclEnd; ParseStaticAssertDeclaration(DeclEnd); continue; @@ -5180,6 +5180,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::kw_friend: // static_assert-declaration + case tok::kw_static_assert: case tok::kw__Static_assert: // GNU typeof support. diff --git a/clang/test/Sema/static-assert.c b/clang/test/Sema/static-assert.c index f08e557fc8ea..9105f2366985 100644 --- a/clang/test/Sema/static-assert.c +++ b/clang/test/Sema/static-assert.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c11 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fms-compatibility -DMS -fsyntax-only -verify %s // RUN: %clang_cc1 -std=c99 -pedantic -fsyntax-only -verify=expected,ext %s // RUN: %clang_cc1 -xc++ -std=c++11 -pedantic -fsyntax-only -verify=expected,ext,cxx %s @@ -11,10 +12,17 @@ _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 ex _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \ // ext-warning {{'_Static_assert' is a C11 extension}} +#ifdef MS +static_assert(1, "1 is nonzero"); +#endif + void foo(void) { _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 extension}} _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \ // ext-warning {{'_Static_assert' is a C11 extension}} +#ifdef MS + static_assert(1, "1 is nonzero"); +#endif } _Static_assert(1, invalid); // expected-error {{expected string literal for diagnostic message in static_assert}} \ @@ -25,6 +33,9 @@ struct A { _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 extension}} _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \ // ext-warning {{'_Static_assert' is a C11 extension}} +#ifdef MS + static_assert(1, "1 is nonzero"); +#endif }; #ifdef __cplusplus From 1edbbf9d20d9f859f7ff2a146a501aeb1423141e Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 20 Jan 2021 12:38:32 +0100 Subject: [PATCH 015/244] [clangd] Log warning when using legacy (theia) semantic highlighting. The legacy protocol will be removed on trunk after the 12 branch cut, and gone in clangd 13. Differential Revision: https://reviews.llvm.org/D95031 (cherry picked from commit 29472bb76915c4929aecc938300f6df31f63ac29) --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index dc89ebd59fe2..35aed2166f03 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -510,6 +510,11 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, "semanticTokens request, choosing the latter (no notifications)."); Opts.TheiaSemanticHighlighting = false; } + if (Opts.TheiaSemanticHighlighting) { + log("Using legacy semanticHighlights notification, which will be removed " + "in clangd 13. Clients should use the standard semanticTokens " + "request instead."); + } if (Params.rootUri && *Params.rootUri) Opts.WorkspaceRoot = std::string(Params.rootUri->file()); From 61e05d1bc1af737c5f24fd5cd765f1a9914cbd13 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 25 Jan 2021 16:16:22 +0100 Subject: [PATCH 016/244] [clangd] Parse Diagnostics block, and nest ClangTidy block under it. (ClangTidy configuration block hasn't been in any release, so we should be OK to move it around like this) Differential Revision: https://reviews.llvm.org/D95362 (cherry picked from commit c3df9d58c75e0f89ca95e947804d65e79a491adc) --- clang-tools-extra/clangd/Config.h | 15 +++--- clang-tools-extra/clangd/ConfigCompile.cpp | 14 +++--- clang-tools-extra/clangd/ConfigFragment.h | 47 +++++++++---------- clang-tools-extra/clangd/ConfigYAML.cpp | 14 +++++- clang-tools-extra/clangd/TidyProvider.cpp | 2 +- .../clangd/unittests/ConfigCompileTests.cpp | 32 +++++++------ .../clangd/unittests/ConfigYAMLTests.cpp | 11 +++-- 7 files changed, 75 insertions(+), 60 deletions(-) diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h index 44ca283b6a0e..391632cb086a 100644 --- a/clang-tools-extra/clangd/Config.h +++ b/clang-tools-extra/clangd/Config.h @@ -90,6 +90,13 @@ struct Config { struct { bool SuppressAll = false; llvm::StringSet<> Suppress; + + /// Configures what clang-tidy checks to run and options to use with them. + struct { + // A comma-seperated list of globs specify which clang-tidy checks to run. + std::string Checks; + llvm::StringMap CheckOptions; + } ClangTidy; } Diagnostics; /// Style of the codebase. @@ -99,14 +106,6 @@ struct Config { // ::). All nested namespaces are affected as well. std::vector FullyQualifiedNamespaces; } Style; - - /// Configures what clang-tidy checks to run and options to use with them. - struct { - // A comma-seperated list of globs to specify which clang-tidy checks to - // run. - std::string Checks; - llvm::StringMap CheckOptions; - } ClangTidy; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp index e82c6e159421..8682cae36f26 100644 --- a/clang-tools-extra/clangd/ConfigCompile.cpp +++ b/clang-tools-extra/clangd/ConfigCompile.cpp @@ -189,7 +189,6 @@ struct FragmentCompiler { compile(std::move(F.CompileFlags)); compile(std::move(F.Index)); compile(std::move(F.Diagnostics)); - compile(std::move(F.ClangTidy)); } void compile(Fragment::IfBlock &&F) { @@ -379,6 +378,8 @@ struct FragmentCompiler { for (llvm::StringRef N : Normalized) C.Diagnostics.Suppress.insert(N); }); + + compile(std::move(F.ClangTidy)); } void compile(Fragment::StyleBlock &&F) { @@ -422,7 +423,7 @@ struct FragmentCompiler { CurSpec += Str; } - void compile(Fragment::ClangTidyBlock &&F) { + void compile(Fragment::DiagnosticsBlock::ClangTidyBlock &&F) { std::string Checks; for (auto &CheckGlob : F.Add) appendTidyCheckSpec(Checks, CheckGlob, true); @@ -433,8 +434,9 @@ struct FragmentCompiler { if (!Checks.empty()) Out.Apply.push_back( [Checks = std::move(Checks)](const Params &, Config &C) { - C.ClangTidy.Checks.append( - Checks, C.ClangTidy.Checks.empty() ? /*skip comma*/ 1 : 0, + C.Diagnostics.ClangTidy.Checks.append( + Checks, + C.Diagnostics.ClangTidy.Checks.empty() ? /*skip comma*/ 1 : 0, std::string::npos); }); if (!F.CheckOptions.empty()) { @@ -445,8 +447,8 @@ struct FragmentCompiler { Out.Apply.push_back( [CheckOptions = std::move(CheckOptions)](const Params &, Config &C) { for (auto &StringPair : CheckOptions) - C.ClangTidy.CheckOptions.insert_or_assign(StringPair.first, - StringPair.second); + C.Diagnostics.ClangTidy.CheckOptions.insert_or_assign( + StringPair.first, StringPair.second); }); } } diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 5b67c49fe154..c36b07f5e8e2 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -203,6 +203,29 @@ struct Fragment { /// (e.g. by disabling a clang-tidy check, or the -Wunused compile flag). /// This often has other advantages, such as skipping some analysis. std::vector> Suppress; + + /// Controls how clang-tidy will run over the code base. + /// + /// The settings are merged with any settings found in .clang-tidy + /// configiration files with these ones taking precedence. + struct ClangTidyBlock { + std::vector> Add; + /// List of checks to disable. + /// Takes precedence over Add. To enable all llvm checks except include + /// order: + /// Add: llvm-* + /// Remove: llvm-include-onder + std::vector> Remove; + + /// A Key-Value pair list of options to pass to clang-tidy checks + /// These take precedence over options specified in clang-tidy + /// configuration files. Example: + /// CheckOptions: + /// readability-braces-around-statements.ShortStatementLines: 2 + std::vector, Located>> + CheckOptions; + }; + ClangTidyBlock ClangTidy; }; DiagnosticsBlock Diagnostics; @@ -215,30 +238,6 @@ struct Fragment { std::vector> FullyQualifiedNamespaces; }; StyleBlock Style; - - /// Controls how clang-tidy will run over the code base. - /// - /// The settings are merged with any settings found in .clang-tidy - /// configiration files with these ones taking precedence. - // FIXME: move this to Diagnostics.Tidy. - struct ClangTidyBlock { - std::vector> Add; - /// List of checks to disable. - /// Takes precedence over Add. To enable all llvm checks except include - /// order: - /// Add: llvm-* - /// Remove: llvm-include-onder - std::vector> Remove; - - /// A Key-Value pair list of options to pass to clang-tidy checks - /// These take precedence over options specified in clang-tidy configuration - /// files. Example: - /// CheckOptions: - /// readability-braces-around-statements.ShortStatementLines: 2 - std::vector, Located>> - CheckOptions; - }; - ClangTidyBlock ClangTidy; }; } // namespace config diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index 7aaff5565497..348ee9dd1f75 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -62,7 +62,7 @@ class Parser { Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); }); Dict.handle("Index", [&](Node &N) { parse(F.Index, N); }); Dict.handle("Style", [&](Node &N) { parse(F.Style, N); }); - Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); }); + Dict.handle("Diagnostics", [&](Node &N) { parse(F.Diagnostics, N); }); Dict.parse(N); return !(N.failed() || HadError); } @@ -110,7 +110,17 @@ class Parser { Dict.parse(N); } - void parse(Fragment::ClangTidyBlock &F, Node &N) { + void parse(Fragment::DiagnosticsBlock &F, Node &N) { + DictParser Dict("Diagnostics", this); + Dict.handle("Suppress", [&](Node &N) { + if (auto Values = scalarValues(N)) + F.Suppress = std::move(*Values); + }); + Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); }); + Dict.parse(N); + } + + void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) { DictParser Dict("ClangTidy", this); Dict.handle("Add", [&](Node &N) { if (auto Values = scalarValues(N)) diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index 0a9f12221287..c26c59fd347d 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -255,7 +255,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { TidyProviderRef provideClangdConfig() { return [](tidy::ClangTidyOptions &Opts, llvm::StringRef) { - const auto &CurTidyConfig = Config::current().ClangTidy; + const auto &CurTidyConfig = Config::current().Diagnostics.ClangTidy; if (!CurTidyConfig.Checks.empty()) mergeCheckList(Opts.Checks, CurTidyConfig.Checks); diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp index ef24b5d8417f..4b1da2035727 100644 --- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp @@ -259,32 +259,36 @@ TEST_F(ConfigCompileTests, DiagnosticSuppression) { } TEST_F(ConfigCompileTests, Tidy) { - Frag.ClangTidy.Add.emplace_back("bugprone-use-after-move"); - Frag.ClangTidy.Add.emplace_back("llvm-*"); - Frag.ClangTidy.Remove.emplace_back("llvm-include-order"); - Frag.ClangTidy.Remove.emplace_back("readability-*"); - Frag.ClangTidy.CheckOptions.emplace_back( + auto &Tidy = Frag.Diagnostics.ClangTidy; + Tidy.Add.emplace_back("bugprone-use-after-move"); + Tidy.Add.emplace_back("llvm-*"); + Tidy.Remove.emplace_back("llvm-include-order"); + Tidy.Remove.emplace_back("readability-*"); + Tidy.CheckOptions.emplace_back( std::make_pair(std::string("StrictMode"), std::string("true"))); - Frag.ClangTidy.CheckOptions.emplace_back(std::make_pair( + Tidy.CheckOptions.emplace_back(std::make_pair( std::string("example-check.ExampleOption"), std::string("0"))); EXPECT_TRUE(compileAndApply()); EXPECT_EQ( - Conf.ClangTidy.Checks, + Conf.Diagnostics.ClangTidy.Checks, "bugprone-use-after-move,llvm-*,-llvm-include-order,-readability-*"); - EXPECT_EQ(Conf.ClangTidy.CheckOptions.size(), 2U); - EXPECT_EQ(Conf.ClangTidy.CheckOptions.lookup("StrictMode"), "true"); - EXPECT_EQ(Conf.ClangTidy.CheckOptions.lookup("example-check.ExampleOption"), + EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.size(), 2U); + EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.lookup("StrictMode"), + "true"); + EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.lookup( + "example-check.ExampleOption"), "0"); EXPECT_THAT(Diags.Diagnostics, IsEmpty()); } TEST_F(ConfigCompileTests, TidyBadChecks) { - Frag.ClangTidy.Add.emplace_back("unknown-check"); - Frag.ClangTidy.Remove.emplace_back("*"); - Frag.ClangTidy.Remove.emplace_back("llvm-includeorder"); + auto &Tidy = Frag.Diagnostics.ClangTidy; + Tidy.Add.emplace_back("unknown-check"); + Tidy.Remove.emplace_back("*"); + Tidy.Remove.emplace_back("llvm-includeorder"); EXPECT_TRUE(compileAndApply()); // Ensure bad checks are stripped from the glob. - EXPECT_EQ(Conf.ClangTidy.Checks, "-*"); + EXPECT_EQ(Conf.Diagnostics.ClangTidy.Checks, "-*"); EXPECT_THAT( Diags.Diagnostics, ElementsAre( diff --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp index 25d468ba604a..e1c81344de20 100644 --- a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp +++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp @@ -60,10 +60,11 @@ CompileFlags: { Add: [foo, bar] } Index: Background: Skip --- -ClangTidy: - CheckOptions: - IgnoreMacros: true - example-check.ExampleOption: 0 +Diagnostics: + ClangTidy: + CheckOptions: + IgnoreMacros: true + example-check.ExampleOption: 0 )yaml"; auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback()); EXPECT_THAT(Diags.Diagnostics, IsEmpty()); @@ -77,7 +78,7 @@ CompileFlags: { Add: [foo, bar] } ASSERT_TRUE(Results[2].Index.Background); EXPECT_EQ("Skip", *Results[2].Index.Background.getValue()); - EXPECT_THAT(Results[3].ClangTidy.CheckOptions, + EXPECT_THAT(Results[3].Diagnostics.ClangTidy.CheckOptions, ElementsAre(PairVal("IgnoreMacros", "true"), PairVal("example-check.ExampleOption", "0"))); } From 074ad6de6fae20ff7ff720f79df1d6c1a7845157 Mon Sep 17 00:00:00 2001 From: AndreyChurbanov Date: Fri, 29 Jan 2021 13:16:41 +0300 Subject: [PATCH 017/244] [OpenMP] libomp: fix build by cl with vs2019 Replace VLA with dynamic allocation using alloca(). This fixes https://bugs.llvm.org/show_bug.cgi?id=48919. Differential Revision: https://reviews.llvm.org/D95627 (cherry picked from commit 7f5ad0e07162e0c19e569986ee37a17c147c9a27) --- openmp/runtime/src/kmp_settings.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index a8522130f972..b477edbbfb42 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3355,7 +3355,8 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value, ntraits++; } } - omp_alloctrait_t traits[ntraits]; + omp_alloctrait_t *traits = + (omp_alloctrait_t *)KMP_ALLOCA(ntraits * sizeof(omp_alloctrait_t)); // Helper macros #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) From 99f43f598907a9cc1a613c691ffbce7c8bd4ec75 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 28 Jan 2021 14:37:33 +0100 Subject: [PATCH 018/244] Relax test expectations in debug-info-gline-tables-only-codeview.cpp To make it pass also on 32-bit Windows, see PR48920. (cherry picked from commit 0024efc69ea6cd0b630cd11cef5991b7edb73ffc) --- clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp b/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp index 27ac682c10f5..409b62da62c1 100644 --- a/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp +++ b/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp @@ -25,6 +25,6 @@ void test() { // CHECK: ![[C]] = !DICompositeType(tag: DW_TAG_structure_type, name: "C", // CHECK-SAME: flags: DIFlagFwdDecl // CHECK-NOT: identifier - // CHECK: ![[MTYPE]] = !DISubroutineType(types: !{{.*}}) + // CHECK: ![[MTYPE]] = !DISubroutineType({{.*}}types: !{{.*}}) c.m(); } From c5a1eb9b0a76eef7e3025b7333a0d256b8562360 Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Wed, 27 Jan 2021 16:02:49 +0100 Subject: [PATCH 019/244] [AMDGPU] Avoid an illegal operand in si-shrink-instructions Before the patch it was possible to trigger a constant bus violation when folding immediates into a shrunk instruction. The patch adds a check to enforce the legality of the new operand. Differential Revision: https://reviews.llvm.org/D95527 (cherry picked from commit fc8e7411218c846386650cfba111b62827c71da0) --- .../Target/AMDGPU/SIShrinkInstructions.cpp | 24 ++++++++++--------- .../shrink-instructions-illegal-fold.mir | 23 ++++++++++++++++++ 2 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 2628070f219c..cdb78aae1c4f 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -75,17 +75,19 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineOperand &MovSrc = Def->getOperand(1); bool ConstantFolded = false; - if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || - isUInt<32>(MovSrc.getImm()))) { - Src0.ChangeToImmediate(MovSrc.getImm()); - ConstantFolded = true; - } else if (MovSrc.isFI()) { - Src0.ChangeToFrameIndex(MovSrc.getIndex()); - ConstantFolded = true; - } else if (MovSrc.isGlobal()) { - Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), - MovSrc.getTargetFlags()); - ConstantFolded = true; + if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) { + if (MovSrc.isImm() && + (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) { + Src0.ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFI()) { + Src0.ChangeToFrameIndex(MovSrc.getIndex()); + ConstantFolded = true; + } else if (MovSrc.isGlobal()) { + Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(), + MovSrc.getTargetFlags()); + ConstantFolded = true; + } } if (ConstantFolded) { diff --git a/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir b/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir new file mode 100644 index 000000000000..7889f437facf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir @@ -0,0 +1,23 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-shrink-instructions --verify-machineinstrs %s -o - | FileCheck %s + +# Make sure immediate folding into V_CNDMASK respects constant bus restrictions. +--- + +name: shrink_cndmask_illegal_imm_folding +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: shrink_cndmask_illegal_imm_folding + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32768, implicit $exec + ; CHECK: V_CMP_EQ_U32_e32 0, [[COPY]], implicit-def $vcc, implicit $exec + ; CHECK: V_CNDMASK_B32_e32 [[MOV]], killed [[COPY]], implicit $vcc, implicit $exec + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 32768, implicit $exec + V_CMP_EQ_U32_e32 0, %0:vgpr_32, implicit-def $vcc, implicit $exec + %2:vgpr_32 = V_CNDMASK_B32_e64 0, %1:vgpr_32, 0, killed %0:vgpr_32, $vcc, implicit $exec + S_NOP 0 + +... From b2710e7535bd43d9fd6f9792644fe2c207079c42 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 28 Jan 2021 23:53:45 +0100 Subject: [PATCH 020/244] [sanitizer] Fix msan test build on FreeBSD after 7afdc89c2054 This commit accidentally enabled fgetgrent_r() in the msan tests under FreeBSD, but this function is not supported. Also remove FreeBSD from the SANITIZER_INTERCEPT_FGETGRENT_R macro. (cherry picked from commit e056fc6cb676f72d5b7dfe7ca540b3275bd1a46f) --- compiler-rt/lib/msan/tests/msan_test.cpp | 2 ++ .../lib/sanitizer_common/sanitizer_platform_interceptors.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 7378b237a711..5dc9090f36c0 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -3707,7 +3707,9 @@ TEST(MemorySanitizer, getgrent_r) { EXPECT_NOT_POISONED(grp.gr_gid); EXPECT_NOT_POISONED(grpres); } +#endif +#ifdef __GLIBC__ TEST(MemorySanitizer, fgetgrent_r) { FILE *fp = fopen("/etc/group", "r"); struct group grp; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 7f7b38d4215b..068fc9829e57 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -226,7 +226,7 @@ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) #define SANITIZER_INTERCEPT_GETPWENT \ (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS) -#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_FREEBSD || SI_GLIBC || SI_SOLARIS) +#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_GLIBC || SI_SOLARIS) #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID || SI_SOLARIS #define SANITIZER_INTERCEPT_GETPWENT_R \ (SI_FREEBSD || SI_NETBSD || SI_GLIBC || SI_SOLARIS) From 4e20d9c03d9acc9ee5a78cbba82b08d51ecbaf3f Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Thu, 28 Jan 2021 19:01:41 -0800 Subject: [PATCH 021/244] Make the profile-filter.c test compatible with 32-bit systems This addresses PR48930. Differential Revision: https://reviews.llvm.org/D95658 (cherry picked from commit 0217f1c7a31ba44715bc083a60cddc2192ffed96) --- clang/test/CodeGen/profile-filter.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/test/CodeGen/profile-filter.c b/clang/test/CodeGen/profile-filter.c index 5415ff96cb14..dc5a31e872a1 100644 --- a/clang/test/CodeGen/profile-filter.c +++ b/clang/test/CodeGen/profile-filter.c @@ -28,11 +28,11 @@ unsigned i; // EXCLUDE: noprofile // EXCLUDE: @test1 unsigned test1() { - // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 - // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8 + // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) + // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0) return i + 1; } @@ -47,10 +47,10 @@ unsigned test1() { // EXCLUDE-NOT: noprofile // EXCLUDE: @test2 unsigned test2() { - // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 - // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8 + // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) + // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0) return i - 1; } From 07f8d437134c0b229104241a621db05013da0049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Fri, 29 Jan 2021 02:14:47 +0100 Subject: [PATCH 022/244] [clang-tidy] Fix linking tests to LLVMTestingSupport LLVMTestingSupport is not part of libLLVM, and therefore can not be linked to via LLVM_LINK_COMPONENTS. Instead, it needs to be specified explicitly to ensure that it is linked explicitly even if LLVM_LINK_LLVM_DYLIB is used. This is consistent with handling in clangd. Fixes PR#48931 Differential Revision: https://reviews.llvm.org/D95653 (cherry picked from commit 632545e8ce846ccaeca8df15a3dc5e36d01a1275) --- clang-tools-extra/unittests/clang-tidy/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt index be35b71d15cf..05d330dd8033 100644 --- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -1,7 +1,6 @@ set(LLVM_LINK_COMPONENTS FrontendOpenMP Support - TestingSupport ) get_filename_component(CLANG_LINT_SOURCE_DIR @@ -46,4 +45,5 @@ target_link_libraries(ClangTidyTests clangTidyObjCModule clangTidyReadabilityModule clangTidyUtils + LLVMTestingSupport ) From f54cf61ad8e1cc6592074ddd7ad07908623ead6b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 27 Jan 2021 17:06:05 -0500 Subject: [PATCH 023/244] [OpenMP][NVPTX] Disable building NVPTX deviceRTL by default on a non-CUDA system D95466 dropped CUDA to build NVPTX deviceRTL and enabled it by default. However, the building requires some libraries that are not available on non-CUDA system by default, which could break the compilation. This patch disabled the build by default. It can be enabled with `LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`. Reviewed By: kparzysz Differential Revision: https://reviews.llvm.org/D95556 (cherry picked from commit fb12df4a8e33d759938057718273dfb434b2d9c4) --- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 4661bf08af1c..23efbba29d66 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -10,6 +10,15 @@ # ##===----------------------------------------------------------------------===## +# By default we will not build NVPTX deviceRTL on a non-CUDA +set(LIBOMPTARGET_BUILD_NVPTX_BCLIB FALSE CACHE BOOL + "Whether build NVPTX deviceRTL on non-CUDA system.") + +if (NOT (LIBOMPTARGET_DEP_CUDA_FOUND OR LIBOMPTARGET_BUILD_NVPTX_BCLIB)) + libomptarget_say("Not building NVPTX deviceRTL by default on non-CUDA system.") + return() +endif() + # Check if we can create an LLVM bitcode implementation of the runtime library # that could be inlined in the user application. For that we need to find # a Clang compiler capable of compiling our CUDA files to LLVM bitcode and From 07dc51637cc419cbd61383eb4e26713a8f931806 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 30 Jan 2021 13:30:48 +0000 Subject: [PATCH 024/244] [LoopUnswitch] Properly update MSSA if header has non-clobbering stores. This patch fixes updating MemorySSA if the header contains memory defs that do not clobber a duplicated instruction. We need to find the first defining access outside the loop body and use that as defining access of the duplicated instruction. This fixes a crash caused by bee486851c1a. (Cherry-picked on the 12.x release branch from 10c57268c074c3ad48f76da38fa2ba575ee3d1f9) --- llvm/lib/Transforms/Scalar/LoopUnswitch.cpp | 10 ++- .../partial-unswitch-update-memoryssa.ll | 76 +++++++++++++++++++ .../LoopUnswitch/partial-unswitch.ll | 36 --------- 3 files changed, 83 insertions(+), 39 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 18717394d384..822a786fc7c7 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1114,12 +1114,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition( Loop *L = LI->getLoopFor(I->getParent()); auto *DefiningAccess = MemA->getDefiningAccess(); - // If the defining access is a MemoryPhi in the header, get the incoming - // value for the pre-header as defining access. - if (DefiningAccess->getBlock() == I->getParent()) { + // Get the first defining access before the loop. + while (L->contains(DefiningAccess->getBlock())) { + // If the defining access is a MemoryPhi, get the incoming + // value for the pre-header as defining access. if (auto *MemPhi = dyn_cast(DefiningAccess)) { DefiningAccess = MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); + } else { + DefiningAccess = + cast(DefiningAccess)->getDefiningAccess(); } } MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll new file mode 100644 index 000000000000..ec1e8eeeb070 --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll @@ -0,0 +1,76 @@ +; RUN: opt -loop-unswitch -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s +; RUN: opt -loop-unswitch -memssa-check-limit=3 -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s + +declare void @clobber() + +; Check that MemorySSA updating can deal with a clobbering access of a +; duplicated load being a MemoryPHI outside the loop. +define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) { +; CHECK-LABEL: @partial_unswitch_memssa_update( +; CHECK-LABEL: loop.ph: +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] +entry: + br i1 %c, label %loop.ph, label %outside.clobber + +outside.clobber: + call void @clobber() + br label %loop.ph + +loop.ph: + br label %loop.header + +loop.header: + %lv = load i32, i32* %ptr, align 4 + %hc = icmp eq i32 %lv, 0 + br i1 %hc, label %if, label %then + +if: + br label %loop.latch + +then: + br label %loop.latch + +loop.latch: + br i1 true, label %loop.header, label %exit + +exit: + ret void +} + +; Check that MemorySSA updating can deal with skipping defining accesses in the +; loop body until it finds the first defining access outside the loop. +define void @partial_unswitch_inloop_stores_beteween_outside_defining_access(i64* noalias %ptr, i16* noalias %src) { +; CHECK-LABEL: @partial_unswitch_inloop_stores_beteween_outside_defining_access +; CHECK-LABEL: entry: +; CHECK-NEXT: store i64 0, i64* %ptr, align 1 +; CHECK-NEXT: store i64 1, i64* %ptr, align 1 +; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i16, i16* %src, align 1 +; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i16 [[LV]], 0 +; CHECK-NEXT: br i1 [[C]] +; +entry: + store i64 0, i64* %ptr, align 1 + store i64 1, i64* %ptr, align 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + store i64 2, i64* %ptr, align 1 + %lv = load i16, i16* %src, align 1 + %invar.cond = icmp eq i16 %lv, 0 + br i1 %invar.cond, label %noclobber, label %loop.latch + +noclobber: + br label %loop.latch + +loop.latch: + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, 1000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll index 9f0e5d6f6c35..96a6b0f4e2b5 100644 --- a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll +++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll @@ -575,42 +575,6 @@ exit: ret i32 10 } -; Check that MemorySSA updating can deal with a clobbering access of a -; duplicated load being a MemoryPHI outside the loop. -define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) { -; CHECK-LABEL: @partial_unswitch_memssa_update( -; CHECK-LABEL: loop.ph: -; CHECK-NEXT: [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4 -; CHECK-NEXT: [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0 -; CHECK-NEXT: br i1 [[C]] -entry: - br i1 %c, label %loop.ph, label %outside.clobber - -outside.clobber: - call void @clobber() - br label %loop.ph - -loop.ph: - br label %loop.header - -loop.header: - %lv = load i32, i32* %ptr, align 4 - %hc = icmp eq i32 %lv, 0 - br i1 %hc, label %if, label %then - -if: - br label %loop.latch - -then: - br label %loop.latch - -loop.latch: - br i1 true, label %loop.header, label %exit - -exit: - ret void -} - ; Make sure the duplicated instructions are moved to a preheader that always ; executes when the loop body also executes. Do not check the unswitched code, ; because it is already checked in the @partial_unswitch_true_successor test From c5fd87eaddaad87b28530e5272b7cf0c788dc1f9 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 27 Jan 2021 03:09:20 +0000 Subject: [PATCH 025/244] workflows: Fix LLVM ABI checks to work for X.0.0 releases --- .github/workflows/llvm-tests.yml | 84 +++++++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 12 deletions(-) diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 675383407d64..1cffc3ef4d97 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -1,8 +1,5 @@ name: LLVM Tests -env: - release_major: 12 - on: push: branches: @@ -42,7 +39,38 @@ jobs: with: cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release + abi-dump-setup: + runs-on: ubuntu-latest + outputs: + BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} + ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} + LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} + steps: + - name: Checkout source + uses: actions/checkout@v1 + with: + fetch-depth: 1 + + - name: Get LLVM version + id: version + uses: tstellar/actions/get-llvm-version@get-version + + - name: Setup Variables + id: vars + run: | + if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then + echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + echo ::set-output name=ABI_HEADERS::llvm-c + else + echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + echo ::set-output name=ABI_HEADERS::. + fi + abi-dump: + needs: abi-dump-setup runs-on: ubuntu-latest strategy: matrix: @@ -51,11 +79,11 @@ jobs: - build-latest include: - name: build-baseline - # FIXME: Referencing the env context does not work here - # ref: llvmorg-${{ env.release_major }}.0.0 - ref: llvmorg-12.0.0 + llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} + ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 repo: llvm/llvm-project - name: build-latest + llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} ref: ${{ github.sha }} repo: ${{ github.repository }} steps: @@ -78,22 +106,44 @@ jobs: repo: ${{ matrix.repo }} - name: Configure run: | - mkdir build - cd build - cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" ../llvm + mkdir install + cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm - name: Build - run: ninja -C build libLLVM-${{ env.release_major }}.so + # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug). + run: | + ninja -C build install-LLVM + ninja -C build install-LLVM + ninja -C build install-llvm-headers - name: Dump ABI - run: abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers llvm/include -o ${{ matrix.ref }}.abi.tar.gz build/lib/libLLVM-${{ env.release_major }}.so + run: | + if [ "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" ]; then + nm ./install/lib/libLLVM.so | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" | cut -d ' ' -f 3 > llvm.symbols + # Even though the -symbols-list option doesn't seem to filter out the symbols, I believe it speeds up processing, so I'm leaving it in. + export EXTRA_ARGS="-symbols-list llvm.symbols" + else + touch llvm.symbols + fi + abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so + # Remove symbol versioning from dumps, so we can compare across major versions. + sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi + tar -czf ${{ matrix.ref }}.abi.tar.gz ${{ matrix.ref }}.abi - name: Upload ABI file uses: actions/upload-artifact@v1 with: name: ${{ matrix.name }} path: ${{ matrix.ref }}.abi.tar.gz + - name: Upload symbol list file + if: matrix.name == 'build-baseline' + uses: actions/upload-artifact@v1 + with: + name: symbol-list + path: llvm.symbols + abi-compare: runs-on: ubuntu-latest needs: + - abi-dump-setup - abi-dump steps: - name: Download baseline @@ -104,10 +154,20 @@ jobs: uses: actions/download-artifact@v1 with: name: build-latest + - name: Download symbol list + uses: actions/download-artifact@v1 + with: + name: symbol-list + - name: Install abi-compliance-checker run: sudo apt-get install abi-compliance-checker - name: Compare ABI - run: abi-compliance-checker -l libLLVM-${{ env.release_major}}.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz + run: | + if [ -s symbol-list/llvm.symbols ]; then + # This option doesn't seem to work with the ABI dumper, so passing it here. + export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols" + fi + abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" - name: Upload ABI Comparison if: always() uses: actions/upload-artifact@v1 From b6d2402e319be00592908b2c9cb63fccdb481008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 2 Feb 2021 15:08:17 +0200 Subject: [PATCH 026/244] [docs] Add release notes for things I've done for the 12.x release branch. --- clang/docs/ReleaseNotes.rst | 3 +++ lld/docs/ReleaseNotes.rst | 20 ++++++++++++++++++-- llvm/docs/ReleaseNotes.rst | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3001d6feb631..a34cd512ca59 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -150,6 +150,9 @@ Attribute Changes in Clang Windows Support --------------- +- Implicitly add ``.exe`` suffix for MinGW targets, even when cross compiling. + (This matches a change from GCC 8.) + C Language Changes in Clang --------------------------- diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index e0b17ca3e030..ea1403888eba 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -35,12 +35,28 @@ Breaking changes COFF Improvements ----------------- -* ... +* Error out clearly if creating a DLL with too many exported symbols. + (`D86701 `_) MinGW Improvements ------------------ -* ... +* Enabled dynamicbase by default. (`D86654 `_) + +* Tolerate mismatches between COMDAT section sizes with different amount of + padding (produced by binutils) by inspecting the aux section definition. + (`D86659 `_) + +* Support setting the subsystem version via the subsystem argument. + (`D88804 `_) + +* Implemented the GNU -wrap option. + (`D89004 `_, + `D91689 `_) + +* Handle the ``--demangle`` and ``--no-demangle`` options. + (`D93950 `_) + MachO Improvements ------------------ diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index de8431fe3908..f2eb53778406 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -92,6 +92,25 @@ Changes to TableGen uses the "`...`" range punctuation (e.g., ``{0...9}``). The hyphen syntax is deprecated. +Changes to the AArch64 Backend +-------------------------- + +During this release ... + +* Lots of improvements to generation of Windows unwind data; the unwind + data is optimized and written in packed form where possible, reducing + the size of unwind data (pdata and xdata sections) by around 60% + compared with LLVM 11. The generation of prologs/epilogs is tweaked + when targeting Windows, to increase the chances of being able to use + the packed unwind info format. + +* Support for creating Windows unwind data using ``.seh_*`` assembler + directives. + +* Produce proper assembly output for the Windows target, including + ``:lo12:`` relocation specifiers, to allow the assembly output + to actually be assembled. + Changes to the ARM Backend -------------------------- From 0db882a0f59afcd7f76d716ca2e04f2d6d92aa03 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 1 Feb 2021 10:48:29 -0800 Subject: [PATCH 027/244] workflows: Fix libclc tests --- .github/workflows/libclc-tests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml index 2f1eb2939ea2..188eecfc3b89 100644 --- a/.github/workflows/libclc-tests.yml +++ b/.github/workflows/libclc-tests.yml @@ -45,9 +45,9 @@ jobs: cmake_args: -G Ninja -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release build_target: "" - name: Build and test libclc + # spirv targets require llvm-spirv, so skip building them until we figure out + # how to install this tool. run: | - mkdir libclc-build - cd libclc-build - cmake -G Ninja ../libclc -DLLVM_CONFIG=../build/bin/llvm-config - ninja - ninja test + cmake -G Ninja -S libclc -B libclc-build -DLLVM_CONFIG=`pwd`/build/bin/llvm-config -DLIBCLC_TARGETS_TO_BUILD="amdgcn--;amdgcn--amdhsa;r600--;nvptx--;nvptx64--;nvptx--nvidiacl;nvptx64--nvidiacl" + ninja -C libclc-build + ninja -C libclc-build test From c0097c784179e6f927ed8ae6b28796faee2fea61 Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Sun, 31 Jan 2021 19:18:41 -0500 Subject: [PATCH 028/244] [OpenMP][Libomptarget] Remove possible harmful copy constructor call for RTLsTy From https://bugs.llvm.org/show_bug.cgi?id=48973, we know that `std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs)` causes compile time problems in libstdc++v3 5.3.1. This is because there was a defect in the standard regarding the `call_once` (LWG 2442). This was fixed in libstdc++ soon thereafter, but there are likely other standard libraries where this will fail. By matching this function call with the other one, we fix this bug. Differential Revision: https://reviews.llvm.org/D95769 --- openmp/libomptarget/src/interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 239570935cb2..cf6d36960c75 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -94,7 +94,7 @@ EXTERN void __tgt_register_requires(int64_t flags) { /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { TIMESCOPE(); - std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs); + std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); for (auto &RTL : PM->RTLs.AllRTLs) { if (RTL.register_lib) { if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { From 162642bec0df760b27e66cfff046b40f1dfd2713 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 4 Feb 2021 00:07:04 +0900 Subject: [PATCH 029/244] Revert "[ConstantFold] Fold more operations to poison" This reverts commit 53040a968dc2ff20931661e55f05da2ef8b964a0 due to its bad interaction with select i1 -> and/or i1 transformation. This fixes: https://bugs.llvm.org/show_bug.cgi?id=49005 https://bugs.llvm.org/show_bug.cgi?id=48435 (cherry picked from commit 06829034ca64b8c83a5b20d8abe5ddbfe7af0004) --- clang/test/Frontend/fixed_point_unary.c | 4 +- llvm/lib/IR/ConstantFold.cpp | 59 ++++++------- ...amdgpu-codegenprepare-fold-binop-select.ll | 2 +- .../Transforms/InstCombine/apint-shift.ll | 2 +- .../canonicalize-ashr-shl-to-masking.ll | 2 +- .../canonicalize-lshr-shl-to-masking.ll | 2 +- .../canonicalize-shl-lshr-to-masking.ll | 2 +- llvm/test/Transforms/InstCombine/icmp.ll | 4 +- ...nput-masking-after-truncation-variant-a.ll | 4 +- ...nput-masking-after-truncation-variant-b.ll | 4 +- ...nput-masking-after-truncation-variant-c.ll | 4 +- ...nput-masking-after-truncation-variant-d.ll | 4 +- ...nput-masking-after-truncation-variant-e.ll | 4 +- ...dant-left-shift-input-masking-variant-a.ll | 4 +- ...dant-left-shift-input-masking-variant-b.ll | 4 +- ...dant-left-shift-input-masking-variant-c.ll | 4 +- ...dant-left-shift-input-masking-variant-d.ll | 4 +- ...dant-left-shift-input-masking-variant-e.ll | 4 +- .../InstCombine/select-of-bittest.ll | 6 +- .../InstCombine/shift-add-inseltpoison.ll | 12 +-- llvm/test/Transforms/InstCombine/shift-add.ll | 12 +-- .../ConstProp/InsertElement-inseltpoison.ll | 2 +- .../InstSimplify/ConstProp/InsertElement.ll | 2 +- .../Transforms/InstSimplify/ConstProp/cast.ll | 4 +- .../InstSimplify/ConstProp/poison.ll | 4 +- .../InstSimplify/ConstProp/shift.ll | 24 ++--- .../vector-undef-elts-inseltpoison.ll | 2 +- .../ConstProp/vector-undef-elts.ll | 2 +- .../ConstProp/vscale-inseltpoison.ll | 16 ++-- .../InstSimplify/ConstProp/vscale.ll | 16 ++-- llvm/test/Transforms/InstSimplify/div.ll | 39 +-------- llvm/test/Transforms/InstSimplify/rem.ll | 31 +------ llvm/test/Transforms/InstSimplify/undef.ll | 87 +++++++++---------- llvm/test/Transforms/SROA/phi-gep.ll | 2 +- llvm/test/Transforms/SROA/select-gep.ll | 2 +- .../X86/insert-binop-inseltpoison.ll | 4 +- .../X86/insert-binop-with-constant.ll | 42 ++++----- .../VectorCombine/X86/insert-binop.ll | 6 +- llvm/unittests/IR/ConstantsTest.cpp | 25 +++--- 39 files changed, 199 insertions(+), 258 deletions(-) diff --git a/clang/test/Frontend/fixed_point_unary.c b/clang/test/Frontend/fixed_point_unary.c index 6ce760daba11..849e38a94bc4 100644 --- a/clang/test/Frontend/fixed_point_unary.c +++ b/clang/test/Frontend/fixed_point_unary.c @@ -90,7 +90,7 @@ void inc_usa() { // SIGNED-LABEL: @inc_uf( // SIGNED-NEXT: entry: // SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2 -// SIGNED-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], poison +// SIGNED-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], undef // SIGNED-NEXT: store i16 [[TMP1]], i16* @uf, align 2 // SIGNED-NEXT: ret void // @@ -271,7 +271,7 @@ void dec_usa() { // SIGNED-LABEL: @dec_uf( // SIGNED-NEXT: entry: // SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2 -// SIGNED-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], poison +// SIGNED-NEXT: [[TMP1:%.*]] = sub i16 [[TMP0]], undef // SIGNED-NEXT: store i16 [[TMP1]], i16* @uf, align 2 // SIGNED-NEXT: ret void // diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 03cb108cc485..95dd55237e5f 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) { // Undefined behavior invoked - the destination type can't represent // the input constant. - return PoisonValue::get(DestTy); + return UndefValue::get(DestTy); } return ConstantInt::get(FPC->getContext(), IntVal); } @@ -916,7 +916,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, unsigned NumElts = ValTy->getNumElements(); if (CIdx->uge(NumElts)) - return PoisonValue::get(Val->getType()); + return UndefValue::get(Val->getType()); SmallVector Result; Result.reserve(NumElts); @@ -1151,21 +1151,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, } case Instruction::SDiv: case Instruction::UDiv: - // X / undef -> poison - // X / 0 -> poison - if (match(C2, m_CombineOr(m_Undef(), m_Zero()))) - return PoisonValue::get(C2->getType()); + // X / undef -> undef + if (isa(C2)) + return C2; + // undef / 0 -> undef // undef / 1 -> undef - if (match(C2, m_One())) + if (match(C2, m_Zero()) || match(C2, m_One())) return C1; // undef / X -> 0 otherwise return Constant::getNullValue(C1->getType()); case Instruction::URem: case Instruction::SRem: - // X % undef -> poison - // X % 0 -> poison - if (match(C2, m_CombineOr(m_Undef(), m_Zero()))) - return PoisonValue::get(C2->getType()); + // X % undef -> undef + if (match(C2, m_Undef())) + return C2; + // undef % 0 -> undef + if (match(C2, m_Zero())) + return C1; // undef % X -> 0 otherwise return Constant::getNullValue(C1->getType()); case Instruction::Or: // X | undef -> -1 @@ -1173,28 +1175,28 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, return C1; return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0 case Instruction::LShr: - // X >>l undef -> poison + // X >>l undef -> undef if (isa(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef >>l 0 -> undef if (match(C2, m_Zero())) return C1; // undef >>l X -> 0 return Constant::getNullValue(C1->getType()); case Instruction::AShr: - // X >>a undef -> poison + // X >>a undef -> undef if (isa(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef >>a 0 -> undef if (match(C2, m_Zero())) return C1; - // TODO: undef >>a X -> poison if the shift is exact + // TODO: undef >>a X -> undef if the shift is exact // undef >>a X -> 0 return Constant::getNullValue(C1->getType()); case Instruction::Shl: // X << undef -> undef if (isa(C2)) - return PoisonValue::get(C2->getType()); + return C2; // undef << 0 -> undef if (match(C2, m_Zero())) return C1; @@ -1247,14 +1249,14 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, if (CI2->isOne()) return C1; // X / 1 == X if (CI2->isZero()) - return PoisonValue::get(CI2->getType()); // X / 0 == poison + return UndefValue::get(CI2->getType()); // X / 0 == undef break; case Instruction::URem: case Instruction::SRem: if (CI2->isOne()) return Constant::getNullValue(CI2->getType()); // X % 1 == 0 if (CI2->isZero()) - return PoisonValue::get(CI2->getType()); // X % 0 == poison + return UndefValue::get(CI2->getType()); // X % 0 == undef break; case Instruction::And: if (CI2->isZero()) return C2; // X & 0 == 0 @@ -1368,7 +1370,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::SDiv: assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) - return PoisonValue::get(CI1->getType()); // MIN_INT / -1 -> poison + return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: assert(!CI2->isZero() && "Div by zero handled above"); @@ -1376,7 +1378,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::SRem: assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) - return PoisonValue::get(CI1->getType()); // MIN_INT % -1 -> poison + return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); case Instruction::And: return ConstantInt::get(CI1->getContext(), C1V & C2V); @@ -1387,15 +1389,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, case Instruction::Shl: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.shl(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef case Instruction::LShr: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef case Instruction::AShr: if (C2V.ult(C1V.getBitWidth())) return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V)); - return PoisonValue::get(C1->getType()); // too big shift is poison + return UndefValue::get(C1->getType()); // too big shift is undef } } @@ -1441,7 +1443,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, // Fast path for splatted constants. if (Constant *C2Splat = C2->getSplatValue()) { if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue()) - return PoisonValue::get(VTy); + return UndefValue::get(VTy); if (Constant *C1Splat = C1->getSplatValue()) { return ConstantVector::getSplat( VTy->getElementCount(), @@ -1458,9 +1460,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx); Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx); - // If any element of a divisor vector is zero, the whole op is poison. + // If any element of a divisor vector is zero, the whole op is undef. if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue()) - return PoisonValue::get(VTy); + return UndefValue::get(VTy); Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); } @@ -2343,8 +2345,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, return PoisonValue::get(GEPTy); if (isa(C)) - // If inbounds, we can choose an out-of-bounds pointer as a base pointer. - return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy); + return UndefValue::get(GEPTy); Constant *Idx0 = cast(Idxs[0]); if (Idxs.size() == 1 && (Idx0->isNullValue() || isa(Idx0))) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll index e0037f0d8e45..bfe83c7a1285 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -42,7 +42,7 @@ define i32 @select_sdiv_rhs_const_i32(i1 %cond) { define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) { ; IR-LABEL: @select_sdiv_lhs_const_v2i32( -; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> , <2 x i32> +; IR-NEXT: [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> , <2 x i32> ; IR-NEXT: ret <2 x i32> [[OP]] ; ; GCN-LABEL: select_sdiv_lhs_const_v2i32: diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll index 908aeac0cea2..5a351efccfcc 100644 --- a/llvm/test/Transforms/InstCombine/apint-shift.ll +++ b/llvm/test/Transforms/InstCombine/apint-shift.ll @@ -337,7 +337,7 @@ define <2 x i1> @test16vec_nonuniform(<2 x i84> %X) { define <2 x i1> @test16vec_undef(<2 x i84> %X) { ; CHECK-LABEL: @test16vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll index 8d29372c3a72..ba0d32ee3768 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll @@ -418,7 +418,7 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = ashr <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll index 40bc4aaab21c..445f6406b3d2 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll @@ -418,7 +418,7 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) { define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i8> [[X:%.*]], ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %tmp0 = lshr <3 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll index 45aa22aa808f..9de0b337de28 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll @@ -171,7 +171,7 @@ define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) { define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index b48466e678d8..5e6bed4e280f 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -2876,7 +2876,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform(<2 x i32> %x) { define <2 x i1> @icmp_and_or_lshr_cst_vec_undef(<2 x i32> %x) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; @@ -2920,7 +2920,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { define <2 x i1> @icmp_and_or_lshr_cst_vec_undef_commute(<2 x i32> %xp) { ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef_commute( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll index e49c381fcd16..89c16a0949e8 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll @@ -103,7 +103,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -138,7 +138,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll index 20f38deeb0d5..8aef637c6a74 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -103,7 +103,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, @@ -138,7 +138,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]] -; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T7]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll index 562280391c5e..61f25e6ca0b1 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -83,7 +83,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -110,7 +110,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index aa644e6264e4..077bb8296f3e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -93,7 +93,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] -; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -124,7 +124,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]] -; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll index f2aa2894e27a..961ea5e48416 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll @@ -83,7 +83,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> @@ -110,7 +110,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll index 882117fe3480..41a71aa98f40 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll @@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -109,7 +109,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index e92875d79207..787135229148 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, @@ -109,7 +109,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]] -; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = add <8 x i32> %nbits, diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index b8066cef2b40..c0959d9e1ac6 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits @@ -81,7 +81,7 @@ define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = lshr <8 x i32> , %nbits diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 20b322c0b647..5e0f0be2b1ad 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -72,7 +72,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits @@ -95,7 +95,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]] -; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T4]] ; %t0 = shl <8 x i32> , %nbits diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll index 46f5b0c2f213..2e335f0083c1 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll @@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits @@ -81,7 +81,7 @@ define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]] -; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], +; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], ; CHECK-NEXT: ret <8 x i32> [[T3]] ; %t0 = shl <8 x i32> %x, %nbits diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll index c85bcba82e97..d9bef00b2f78 100644 --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -82,7 +82,7 @@ define <2 x i32> @and_lshr_and_vec_v2(<2 x i32> %arg) { define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) { ; CHECK-LABEL: @and_lshr_and_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[TMP4]] @@ -91,7 +91,6 @@ define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) { %tmp1 = icmp eq <3 x i32> %tmp, %tmp2 = lshr <3 x i32> %arg, %tmp3 = and <3 x i32> %tmp2, - ; The second element of %tmp4 is poison because it is (undef ? poison : undef). %tmp4 = select <3 x i1> %tmp1, <3 x i32> %tmp3, <3 x i32> ret <3 x i32> %tmp4 } @@ -223,7 +222,7 @@ define <2 x i32> @f_var0_vec(<2 x i32> %arg, <2 x i32> %arg1) { define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-LABEL: @f_var0_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32> @@ -233,7 +232,6 @@ define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { %tmp2 = icmp eq <3 x i32> %tmp, %tmp3 = lshr <3 x i32> %arg, %tmp4 = and <3 x i32> %tmp3, - ; The second element of %tmp5 is poison because it is (undef ? poison : undef). %tmp5 = select <3 x i1> %tmp2, <3 x i32> %tmp4, <3 x i32> ret <3 x i32> %tmp5 } diff --git a/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll index 3232cdc49c0f..e968f13c40b0 100644 --- a/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll @@ -40,7 +40,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) { define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = zext <4 x i16> %A to <4 x i32> @@ -52,7 +52,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -64,7 +64,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -78,7 +78,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -94,7 +94,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -110,7 +110,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll index eea8b7a074d7..e227274f4930 100644 --- a/llvm/test/Transforms/InstCombine/shift-add.ll +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -40,7 +40,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) { define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32> -; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = zext <4 x i16> %A to <4 x i32> @@ -52,7 +52,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -64,7 +64,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( ; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] +; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] ; CHECK-NEXT: ret <4 x i32> [[D]] ; %B = and <4 x i32> %A, @@ -78,7 +78,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = shl <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -94,7 +94,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = ashr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 @@ -110,7 +110,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) { ; CHECK-NEXT: [[A:%.*]] = zext i16 [[I:%.*]] to i32 ; CHECK-NEXT: [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0 ; CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] +; CHECK-NEXT: [[E:%.*]] = lshr <4 x i32> , [[C]] ; CHECK-NEXT: ret <4 x i32> [[E]] ; %A = zext i16 %I to i32 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll index 54b862c8514a..197e7cc0ac75 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll @@ -25,7 +25,7 @@ define <4 x i64> @insertelement() { define <4 x i64> @insertelement_undef() { ; CHECK-LABEL: @insertelement_undef( -; CHECK-NEXT: ret <4 x i64> poison +; CHECK-NEXT: ret <4 x i64> undef ; %vec1 = insertelement <4 x i64> poison, i64 -1, i32 0 %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll index 127c1692b5b8..a9a27a5df01f 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll @@ -25,7 +25,7 @@ define <4 x i64> @insertelement() { define <4 x i64> @insertelement_undef() { ; CHECK-LABEL: @insertelement_undef( -; CHECK-NEXT: ret <4 x i64> poison +; CHECK-NEXT: ret <4 x i64> undef ; %vec1 = insertelement <4 x i64> undef, i64 -1, i32 0 %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1 diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll index 1136151f7157..adf5e4b68a1b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll @@ -5,7 +5,7 @@ define i8 @overflow_fptosi() { ; CHECK-LABEL: @overflow_fptosi( -; CHECK-NEXT: ret i8 poison +; CHECK-NEXT: ret i8 undef ; %i = fptosi double 1.56e+02 to i8 ret i8 %i @@ -13,7 +13,7 @@ define i8 @overflow_fptosi() { define i8 @overflow_fptoui() { ; CHECK-LABEL: @overflow_fptoui( -; CHECK-NEXT: ret i8 poison +; CHECK-NEXT: ret i8 undef ; %i = fptoui double 2.56e+02 to i8 ret i8 %i diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll index f3fe29ff57ba..ea34bb4699e6 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll @@ -104,14 +104,14 @@ define void @vec_aggr_ops() { define void @other_ops(i8 %x) { ; CHECK-LABEL: @other_ops( -; CHECK-NEXT: call void (...) @use(i1 poison, i1 poison, i8 poison, i8 poison, i8* poison, i8* poison) +; CHECK-NEXT: call void (...) @use(i1 poison, i1 poison, i8 poison, i8 poison, i8* poison) ; CHECK-NEXT: ret void ; %i1 = icmp eq i8 poison, 1 %i2 = fcmp oeq float poison, 1.0 %i3 = select i1 poison, i8 1, i8 2 %i4 = select i1 true, i8 poison, i8 %x - call void (...) @use(i1 %i1, i1 %i2, i8 %i3, i8 %i4, i8* getelementptr (i8, i8* poison, i64 1), i8* getelementptr inbounds (i8, i8* undef, i64 1)) + call void (...) @use(i1 %i1, i1 %i2, i8 %i3, i8 %i4, i8* getelementptr (i8, i8* poison, i64 1)) ret void } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll b/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll index a7a60e562117..3e64513533ff 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll @@ -3,15 +3,15 @@ ; CHECK-LABEL: shift_undef_64 define void @shift_undef_64(i64* %p) { %r1 = lshr i64 -1, 4294967296 ; 2^32 - ; CHECK: store i64 poison + ; CHECK: store i64 undef store i64 %r1, i64* %p %r2 = ashr i64 -1, 4294967297 ; 2^32 + 1 - ; CHECK: store i64 poison + ; CHECK: store i64 undef store i64 %r2, i64* %p %r3 = shl i64 -1, 4294967298 ; 2^32 + 2 - ; CHECK: store i64 poison + ; CHECK: store i64 undef store i64 %r3, i64* %p ret void @@ -20,15 +20,15 @@ define void @shift_undef_64(i64* %p) { ; CHECK-LABEL: shift_undef_65 define void @shift_undef_65(i65* %p) { %r1 = lshr i65 2, 18446744073709551617 - ; CHECK: store i65 poison + ; CHECK: store i65 undef store i65 %r1, i65* %p %r2 = ashr i65 4, 18446744073709551617 - ; CHECK: store i65 poison + ; CHECK: store i65 undef store i65 %r2, i65* %p %r3 = shl i65 1, 18446744073709551617 - ; CHECK: store i65 poison + ; CHECK: store i65 undef store i65 %r3, i65* %p ret void @@ -37,15 +37,15 @@ define void @shift_undef_65(i65* %p) { ; CHECK-LABEL: shift_undef_256 define void @shift_undef_256(i256* %p) { %r1 = lshr i256 2, 18446744073709551617 - ; CHECK: store i256 poison + ; CHECK: store i256 undef store i256 %r1, i256* %p %r2 = ashr i256 4, 18446744073709551618 - ; CHECK: store i256 poison + ; CHECK: store i256 undef store i256 %r2, i256* %p %r3 = shl i256 1, 18446744073709551619 - ; CHECK: store i256 poison + ; CHECK: store i256 undef store i256 %r3, i256* %p ret void @@ -54,15 +54,15 @@ define void @shift_undef_256(i256* %p) { ; CHECK-LABEL: shift_undef_511 define void @shift_undef_511(i511* %p) { %r1 = lshr i511 -1, 1208925819614629174706276 ; 2^80 + 100 - ; CHECK: store i511 poison + ; CHECK: store i511 undef store i511 %r1, i511* %p %r2 = ashr i511 -2, 1208925819614629174706200 - ; CHECK: store i511 poison + ; CHECK: store i511 undef store i511 %r2, i511* %p %r3 = shl i511 -3, 1208925819614629174706180 - ; CHECK: store i511 poison + ; CHECK: store i511 undef store i511 %r3, i511* %p ret void diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll index 6ce03dd2e0f0..2762291d7954 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll @@ -5,7 +5,7 @@ define <3 x i8> @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> ; %c = shl <3 x i8> undef, ret <3 x i8> %c diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll index 99cc2527d12e..5d0f484bc3fd 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll @@ -5,7 +5,7 @@ define <3 x i8> @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret <3 x i8> +; CHECK-NEXT: ret <3 x i8> ; %c = shl <3 x i8> undef, ret <3 x i8> %c diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll index 9689887be69b..ee19e617748b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll @@ -75,7 +75,7 @@ define @fmul() { define @udiv() { ; CHECK-LABEL: @udiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv undef, undef ret %r @@ -83,7 +83,7 @@ define @udiv() { define @udiv_splat_zero() { ; CHECK-LABEL: @udiv_splat_zero( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv zeroinitializer, zeroinitializer ret %r @@ -91,7 +91,7 @@ define @udiv_splat_zero() { define @sdiv() { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = sdiv undef, undef ret %r @@ -107,7 +107,7 @@ define @fdiv() { define @urem() { ; CHECK-LABEL: @urem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = urem undef, undef ret %r @@ -115,7 +115,7 @@ define @urem() { define @srem() { ; CHECK-LABEL: @srem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = srem undef, undef ret %r @@ -135,7 +135,7 @@ define @frem() { define @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = shl undef, undef ret %r @@ -143,7 +143,7 @@ define @shl() { define @lshr() { ; CHECK-LABEL: @lshr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = lshr undef, undef ret %r @@ -151,7 +151,7 @@ define @lshr() { define @ashr() { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = ashr undef, undef ret %r diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll index 048e8840ffd8..66e4c93e1968 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll @@ -75,7 +75,7 @@ define @fmul() { define @udiv() { ; CHECK-LABEL: @udiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv undef, undef ret %r @@ -83,7 +83,7 @@ define @udiv() { define @udiv_splat_zero() { ; CHECK-LABEL: @udiv_splat_zero( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = udiv zeroinitializer, zeroinitializer ret %r @@ -91,7 +91,7 @@ define @udiv_splat_zero() { define @sdiv() { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = sdiv undef, undef ret %r @@ -107,7 +107,7 @@ define @fdiv() { define @urem() { ; CHECK-LABEL: @urem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = urem undef, undef ret %r @@ -115,7 +115,7 @@ define @urem() { define @srem() { ; CHECK-LABEL: @srem( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = srem undef, undef ret %r @@ -135,7 +135,7 @@ define @frem() { define @shl() { ; CHECK-LABEL: @shl( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = shl undef, undef ret %r @@ -143,7 +143,7 @@ define @shl() { define @lshr() { ; CHECK-LABEL: @lshr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = lshr undef, undef ret %r @@ -151,7 +151,7 @@ define @lshr() { define @ashr() { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: ret poison +; CHECK-NEXT: ret undef ; %r = ashr undef, undef ret %r diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll index 7c8efc27d3aa..5a3e6e8f7daa 100644 --- a/llvm/test/Transforms/InstSimplify/div.ll +++ b/llvm/test/Transforms/InstSimplify/div.ll @@ -25,11 +25,11 @@ define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) { ret <2 x i32> %B } -; Division-by-zero is poison. UB in any vector lane means the whole op is poison. +; Division-by-zero is undef. UB in any vector lane means the whole op is undef. define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @sdiv_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %div = sdiv <2 x i8> , ret <2 x i8> %div @@ -37,7 +37,7 @@ define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) { define <2 x i8> @udiv_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @udiv_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %div = udiv <2 x i8> , ret <2 x i8> %div @@ -193,37 +193,4 @@ define i32 @div1() { ret i32 %urem } -define i8 @sdiv_minusone_divisor() { -; CHECK-LABEL: @sdiv_minusone_divisor( -; CHECK-NEXT: ret i8 poison -; - %v = sdiv i8 -128, -1 - ret i8 %v -} - -define i32 @poison(i32 %x) { -; CHECK-LABEL: @poison( -; CHECK-NEXT: ret i32 poison -; - %v = udiv i32 %x, poison - ret i32 %v -} - -; TODO: this should be poison -define i32 @poison2(i32 %x) { -; CHECK-LABEL: @poison2( -; CHECK-NEXT: ret i32 0 -; - %v = udiv i32 poison, %x - ret i32 %v -} - -define <2 x i32> @poison3(<2 x i32> %x) { -; CHECK-LABEL: @poison3( -; CHECK-NEXT: ret <2 x i32> poison -; - %v = udiv <2 x i32> %x, - ret <2 x i32> %v -} - !0 = !{i32 0, i32 3} diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll index 6aaeb5c70d00..6ccb6474ce44 100644 --- a/llvm/test/Transforms/InstSimplify/rem.ll +++ b/llvm/test/Transforms/InstSimplify/rem.ll @@ -25,11 +25,11 @@ define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) { ret <2 x i32> %B } -; Division-by-zero is poison. UB in any vector lane means the whole op is poison. +; Division-by-zero is undef. UB in any vector lane means the whole op is undef. define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @srem_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %rem = srem <2 x i8> , ret <2 x i8> %rem @@ -37,7 +37,7 @@ define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) { define <2 x i8> @urem_zero_elt_vec_constfold(<2 x i8> %x) { ; CHECK-LABEL: @urem_zero_elt_vec_constfold( -; CHECK-NEXT: ret <2 x i8> poison +; CHECK-NEXT: ret <2 x i8> undef ; %rem = urem <2 x i8> , ret <2 x i8> %rem @@ -325,28 +325,3 @@ define <2 x i32> @srem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i32> %y) { ret <2 x i32> %r } -define i8 @srem_minusone_divisor() { -; CHECK-LABEL: @srem_minusone_divisor( -; CHECK-NEXT: ret i8 poison -; - %v = srem i8 -128, -1 - ret i8 %v -} - -define i32 @poison(i32 %x) { -; CHECK-LABEL: @poison( -; CHECK-NEXT: ret i32 poison -; - %v = urem i32 %x, poison - ret i32 %v -} - -; TODO: this should be poison - -define i32 @poison2(i32 %x) { -; CHECK-LABEL: @poison2( -; CHECK-NEXT: ret i32 0 -; - %v = urem i32 poison, %x - ret i32 %v -} diff --git a/llvm/test/Transforms/InstSimplify/undef.ll b/llvm/test/Transforms/InstSimplify/undef.ll index d09dc43da091..fe1f412d3d37 100644 --- a/llvm/test/Transforms/InstSimplify/undef.ll +++ b/llvm/test/Transforms/InstSimplify/undef.ll @@ -1,9 +1,8 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instsimplify -S < %s | FileCheck %s define i64 @test0() { ; CHECK-LABEL: @test0( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = mul i64 undef, undef ret i64 %r @@ -11,7 +10,7 @@ define i64 @test0() { define i64 @test1() { ; CHECK-LABEL: @test1( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = mul i64 3, undef ret i64 %r @@ -19,7 +18,7 @@ define i64 @test1() { define i64 @test2() { ; CHECK-LABEL: @test2( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = mul i64 undef, 3 ret i64 %r @@ -27,7 +26,7 @@ define i64 @test2() { define i64 @test3() { ; CHECK-LABEL: @test3( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = mul i64 undef, 6 ret i64 %r @@ -35,7 +34,7 @@ define i64 @test3() { define i64 @test4() { ; CHECK-LABEL: @test4( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = mul i64 6, undef ret i64 %r @@ -43,7 +42,7 @@ define i64 @test4() { define i64 @test5() { ; CHECK-LABEL: @test5( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = and i64 undef, undef ret i64 %r @@ -51,7 +50,7 @@ define i64 @test5() { define i64 @test6() { ; CHECK-LABEL: @test6( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = or i64 undef, undef ret i64 %r @@ -59,7 +58,7 @@ define i64 @test6() { define i64 @test7() { ; CHECK-LABEL: @test7( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = udiv i64 undef, 1 ret i64 %r @@ -67,7 +66,7 @@ define i64 @test7() { define i64 @test8() { ; CHECK-LABEL: @test8( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = sdiv i64 undef, 1 ret i64 %r @@ -75,7 +74,7 @@ define i64 @test8() { define i64 @test9() { ; CHECK-LABEL: @test9( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = urem i64 undef, 1 ret i64 %r @@ -83,7 +82,7 @@ define i64 @test9() { define i64 @test10() { ; CHECK-LABEL: @test10( -; CHECK-NEXT: ret i64 0 +; CHECK: ret i64 0 ; %r = srem i64 undef, 1 ret i64 %r @@ -91,7 +90,7 @@ define i64 @test10() { define i64 @test11() { ; CHECK-LABEL: @test11( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 undef ; %r = shl i64 undef, undef ret i64 %r @@ -99,7 +98,7 @@ define i64 @test11() { define i64 @test11b(i64 %a) { ; CHECK-LABEL: @test11b( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 poison ; %r = shl i64 %a, undef ret i64 %r @@ -107,7 +106,7 @@ define i64 @test11b(i64 %a) { define i64 @test12() { ; CHECK-LABEL: @test12( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 undef ; %r = ashr i64 undef, undef ret i64 %r @@ -115,7 +114,7 @@ define i64 @test12() { define i64 @test12b(i64 %a) { ; CHECK-LABEL: @test12b( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 poison ; %r = ashr i64 %a, undef ret i64 %r @@ -123,7 +122,7 @@ define i64 @test12b(i64 %a) { define i64 @test13() { ; CHECK-LABEL: @test13( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 undef ; %r = lshr i64 undef, undef ret i64 %r @@ -131,7 +130,7 @@ define i64 @test13() { define i64 @test13b(i64 %a) { ; CHECK-LABEL: @test13b( -; CHECK-NEXT: ret i64 poison +; CHECK: ret i64 poison ; %r = lshr i64 %a, undef ret i64 %r @@ -139,7 +138,7 @@ define i64 @test13b(i64 %a) { define i1 @test14() { ; CHECK-LABEL: @test14( -; CHECK-NEXT: ret i1 undef +; CHECK: ret i1 undef ; %r = icmp slt i64 undef, undef ret i1 %r @@ -147,7 +146,7 @@ define i1 @test14() { define i1 @test15() { ; CHECK-LABEL: @test15( -; CHECK-NEXT: ret i1 undef +; CHECK: ret i1 undef ; %r = icmp ult i64 undef, undef ret i1 %r @@ -155,7 +154,7 @@ define i1 @test15() { define i64 @test16(i64 %a) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = select i1 undef, i64 %a, i64 undef ret i64 %r @@ -163,7 +162,7 @@ define i64 @test16(i64 %a) { define i64 @test17(i64 %a) { ; CHECK-LABEL: @test17( -; CHECK-NEXT: ret i64 undef +; CHECK: ret i64 undef ; %r = select i1 undef, i64 undef, i64 %a ret i64 %r @@ -171,7 +170,7 @@ define i64 @test17(i64 %a) { define i64 @test18(i64 %a) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[R:%.*]] = call i64 undef(i64 [[A:%.*]]) +; CHECK: [[R:%.*]] = call i64 undef(i64 %a) ; CHECK-NEXT: ret i64 poison ; %r = call i64 (i64) undef(i64 %a) @@ -180,7 +179,7 @@ define i64 @test18(i64 %a) { define <4 x i8> @test19(<4 x i8> %a) { ; CHECK-LABEL: @test19( -; CHECK-NEXT: ret <4 x i8> poison +; CHECK: ret <4 x i8> poison ; %b = shl <4 x i8> %a, ret <4 x i8> %b @@ -188,7 +187,7 @@ define <4 x i8> @test19(<4 x i8> %a) { define i32 @test20(i32 %a) { ; CHECK-LABEL: @test20( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = udiv i32 %a, 0 ret i32 %b @@ -204,7 +203,7 @@ define <2 x i32> @test20vec(<2 x i32> %a) { define i32 @test21(i32 %a) { ; CHECK-LABEL: @test21( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = sdiv i32 %a, 0 ret i32 %b @@ -220,7 +219,7 @@ define <2 x i32> @test21vec(<2 x i32> %a) { define i32 @test22(i32 %a) { ; CHECK-LABEL: @test22( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = ashr exact i32 undef, %a ret i32 %b @@ -228,7 +227,7 @@ define i32 @test22(i32 %a) { define i32 @test23(i32 %a) { ; CHECK-LABEL: @test23( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = lshr exact i32 undef, %a ret i32 %b @@ -236,7 +235,7 @@ define i32 @test23(i32 %a) { define i32 @test24() { ; CHECK-LABEL: @test24( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = udiv i32 undef, 0 ret i32 %b @@ -244,7 +243,7 @@ define i32 @test24() { define i32 @test25() { ; CHECK-LABEL: @test25( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = lshr i32 0, undef ret i32 %b @@ -252,7 +251,7 @@ define i32 @test25() { define i32 @test26() { ; CHECK-LABEL: @test26( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = ashr i32 0, undef ret i32 %b @@ -260,7 +259,7 @@ define i32 @test26() { define i32 @test27() { ; CHECK-LABEL: @test27( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = shl i32 0, undef ret i32 %b @@ -268,7 +267,7 @@ define i32 @test27() { define i32 @test28(i32 %a) { ; CHECK-LABEL: @test28( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl nsw i32 undef, %a ret i32 %b @@ -276,7 +275,7 @@ define i32 @test28(i32 %a) { define i32 @test29(i32 %a) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl nuw i32 undef, %a ret i32 %b @@ -284,7 +283,7 @@ define i32 @test29(i32 %a) { define i32 @test30(i32 %a) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl nsw nuw i32 undef, %a ret i32 %b @@ -292,7 +291,7 @@ define i32 @test30(i32 %a) { define i32 @test31(i32 %a) { ; CHECK-LABEL: @test31( -; CHECK-NEXT: ret i32 0 +; CHECK: ret i32 0 ; %b = shl i32 undef, %a ret i32 %b @@ -300,7 +299,7 @@ define i32 @test31(i32 %a) { define i32 @test32(i32 %a) { ; CHECK-LABEL: @test32( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = shl i32 undef, 0 ret i32 %b @@ -308,7 +307,7 @@ define i32 @test32(i32 %a) { define i32 @test33(i32 %a) { ; CHECK-LABEL: @test33( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = ashr i32 undef, 0 ret i32 %b @@ -316,7 +315,7 @@ define i32 @test33(i32 %a) { define i32 @test34(i32 %a) { ; CHECK-LABEL: @test34( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = lshr i32 undef, 0 ret i32 %b @@ -324,7 +323,7 @@ define i32 @test34(i32 %a) { define i32 @test35(<4 x i32> %V) { ; CHECK-LABEL: @test35( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = extractelement <4 x i32> %V, i32 4 ret i32 %b @@ -332,7 +331,7 @@ define i32 @test35(<4 x i32> %V) { define i32 @test36(i32 %V) { ; CHECK-LABEL: @test36( -; CHECK-NEXT: ret i32 undef +; CHECK: ret i32 undef ; %b = extractelement <4 x i32> undef, i32 %V ret i32 %b @@ -340,7 +339,7 @@ define i32 @test36(i32 %V) { define i32 @test37() { ; CHECK-LABEL: @test37( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = udiv i32 undef, undef ret i32 %b @@ -348,7 +347,7 @@ define i32 @test37() { define i32 @test38(i32 %a) { ; CHECK-LABEL: @test38( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 poison ; %b = udiv i32 %a, undef ret i32 %b @@ -356,7 +355,7 @@ define i32 @test38(i32 %a) { define i32 @test39() { ; CHECK-LABEL: @test39( -; CHECK-NEXT: ret i32 poison +; CHECK: ret i32 undef ; %b = udiv i32 0, undef ret i32 %b diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll index 915ae546beda..6bf2a7718658 100644 --- a/llvm/test/Transforms/SROA/phi-gep.ll +++ b/llvm/test/Transforms/SROA/phi-gep.ll @@ -348,7 +348,7 @@ define void @test_sroa_gep_phi_select_same_block() { ; CHECK-NEXT: [[PHI:%.*]] = phi %pair* [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ] ; CHECK-NEXT: [[SELECT]] = select i1 undef, %pair* [[PHI]], %pair* undef ; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[PHI]], i64 1 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 undef, %pair* [[PHI_SROA_GEP]], %pair* poison +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 undef, %pair* [[PHI_SROA_GEP]], %pair* undef ; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[WHILE_BODY]] ; CHECK: exit: ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll index f69cfeb410bd..93cb3420d0af 100644 --- a/llvm/test/Transforms/SROA/select-gep.ll +++ b/llvm/test/Transforms/SROA/select-gep.ll @@ -83,7 +83,7 @@ define i32 @test_sroa_select_gep_undef(i1 %cond) { ; CHECK-LABEL: @test_sroa_select_gep_undef( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* poison +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* undef ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SELECT_SROA_SEL]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll index 8a6b1e98c968..b9d82e9f81df 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll @@ -128,7 +128,7 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_udiv( ; CHECK-NEXT: [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i32 1 @@ -143,7 +143,7 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_urem( ; CHECK-NEXT: [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i64 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index 0637b5005683..a400e8f42907 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -131,7 +131,7 @@ define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) { define <2 x i64> @shl_constant_op0(i64 %x) { ; CHECK-LABEL: @shl_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -142,7 +142,7 @@ define <2 x i64> @shl_constant_op0(i64 %x) { define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @shl_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -189,7 +189,7 @@ define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) { define <2 x i64> @shl_constant_op1(i64 %x) { ; CHECK-LABEL: @shl_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -224,7 +224,7 @@ define <2 x i64> @shl_constant_op1_load(i64* %p) { define <2 x i64> @ashr_constant_op0(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -235,7 +235,7 @@ define <2 x i64> @ashr_constant_op0(i64 %x) { define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -246,7 +246,7 @@ define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @ashr_constant_op1(i64 %x) { ; CHECK-LABEL: @ashr_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -268,7 +268,7 @@ define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op0(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -279,7 +279,7 @@ define <2 x i64> @lshr_constant_op0(i64 %x) { define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -290,7 +290,7 @@ define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op1(i64 %x) { ; CHECK-LABEL: @lshr_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -312,7 +312,7 @@ define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op0(i64 %x) { ; CHECK-LABEL: @urem_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -323,7 +323,7 @@ define <2 x i64> @urem_constant_op0(i64 %x) { define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @urem_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -334,7 +334,7 @@ define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op1(i64 %x) { ; CHECK-LABEL: @urem_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -356,7 +356,7 @@ define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op0(i64 %x) { ; CHECK-LABEL: @srem_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -367,7 +367,7 @@ define <2 x i64> @srem_constant_op0(i64 %x) { define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @srem_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -378,7 +378,7 @@ define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op1(i64 %x) { ; CHECK-LABEL: @srem_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -400,7 +400,7 @@ define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op0(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -411,7 +411,7 @@ define <2 x i64> @udiv_constant_op0(i64 %x) { define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -422,7 +422,7 @@ define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op1(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -444,7 +444,7 @@ define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op0(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -455,7 +455,7 @@ define <2 x i64> @sdiv_constant_op0(i64 %x) { define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -466,7 +466,7 @@ define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op1(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1( ; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 -; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll index 4fd33cc7ef28..abebf4d809af 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll @@ -53,7 +53,7 @@ define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) { ; CHECK-NEXT: [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]] ; CHECK-NEXT: [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]] ; CHECK-NEXT: [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]] -; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> poison, i64 [[S2_SCALAR]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[S2]] ; %i0 = insertelement <2 x i64> undef, i64 %w, i64 1 @@ -128,7 +128,7 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_udiv( ; CHECK-NEXT: [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i32 1 @@ -143,7 +143,7 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) { define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) { ; CHECK-LABEL: @ins1_ins1_urem( ; CHECK-NEXT: [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> , i64 [[R_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[R]] ; %i0 = insertelement <2 x i64> , i64 %x, i64 1 diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp index 9eabc7c55638..96d3672647e8 100644 --- a/llvm/unittests/IR/ConstantsTest.cpp +++ b/llvm/unittests/IR/ConstantsTest.cpp @@ -27,7 +27,7 @@ TEST(ConstantsTest, Integer_i1) { Constant* Zero = ConstantInt::get(Int1, 0); Constant* NegOne = ConstantInt::get(Int1, static_cast(-1), true); EXPECT_EQ(NegOne, ConstantInt::getSigned(Int1, -1)); - Constant* Poison = PoisonValue::get(Int1); + Constant* Undef = UndefValue::get(Int1); // Input: @b = constant i1 add(i1 1 , i1 1) // Output: @b = constant i1 false @@ -53,21 +53,21 @@ TEST(ConstantsTest, Integer_i1) { // @g = constant i1 false EXPECT_EQ(Zero, ConstantExpr::getSub(One, One)); - // @h = constant i1 shl(i1 1 , i1 1) ; poison - // @h = constant i1 poison - EXPECT_EQ(Poison, ConstantExpr::getShl(One, One)); + // @h = constant i1 shl(i1 1 , i1 1) ; undefined + // @h = constant i1 undef + EXPECT_EQ(Undef, ConstantExpr::getShl(One, One)); // @i = constant i1 shl(i1 1 , i1 0) // @i = constant i1 true EXPECT_EQ(One, ConstantExpr::getShl(One, Zero)); - // @j = constant i1 lshr(i1 1, i1 1) ; poison - // @j = constant i1 poison - EXPECT_EQ(Poison, ConstantExpr::getLShr(One, One)); + // @j = constant i1 lshr(i1 1, i1 1) ; undefined + // @j = constant i1 undef + EXPECT_EQ(Undef, ConstantExpr::getLShr(One, One)); - // @m = constant i1 ashr(i1 1, i1 1) ; poison - // @m = constant i1 poison - EXPECT_EQ(Poison, ConstantExpr::getAShr(One, One)); + // @m = constant i1 ashr(i1 1, i1 1) ; undefined + // @m = constant i1 undef + EXPECT_EQ(Undef, ConstantExpr::getAShr(One, One)); // @n = constant i1 mul(i1 -1, i1 1) // @n = constant i1 true @@ -218,6 +218,7 @@ TEST(ConstantsTest, AsInstructionsTest) { Constant *Elt = ConstantInt::get(Int16Ty, 2015); Constant *Poison16 = PoisonValue::get(Int16Ty); Constant *Undef64 = UndefValue::get(Int64Ty); + Constant *UndefV16 = UndefValue::get(P6->getType()); Constant *PoisonV16 = PoisonValue::get(P6->getType()); #define P0STR "ptrtoint (i32** @dummy to i32)" @@ -294,8 +295,8 @@ TEST(ConstantsTest, AsInstructionsTest) { EXPECT_EQ(Elt, ConstantExpr::getExtractElement( ConstantExpr::getInsertElement(P6, Elt, One), One)); - EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Two)); - EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Big)); + EXPECT_EQ(UndefV16, ConstantExpr::getInsertElement(P6, Elt, Two)); + EXPECT_EQ(UndefV16, ConstantExpr::getInsertElement(P6, Elt, Big)); EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Undef64)); } From 91f34dabb92d8446142b3c5777fa83e6bcbdfa7e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 2 Feb 2021 18:41:49 -0800 Subject: [PATCH 030/244] workflows: Re-enable lldb test on Mac OS X --- .github/workflows/lldb-tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml index 93fddc2de8c6..68aec6036995 100644 --- a/.github/workflows/lldb-tests.yml +++ b/.github/workflows/lldb-tests.yml @@ -20,14 +20,16 @@ jobs: build_lldb: name: lldb build runs-on: ${{ matrix.os }} + # Workaround for build faliure on Mac OS X: llvm.org/PR46190, https://github.com/actions/virtual-environments/issues/2274 + env: + CPLUS_INCLUDE_PATH: /usr/local/opt/llvm/include/c++/v1:/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include strategy: fail-fast: false matrix: os: - ubuntu-latest - windows-latest - # macOS build disabled due to: llvm.org/PR46190 - #- macOS-latest + - macOS-latest steps: - name: Setup Windows if: startsWith(matrix.os, 'windows') From 872608926129a61489d484e15cb9186882578c73 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 3 Feb 2021 03:09:24 +0000 Subject: [PATCH 031/244] workflows: Fix actions repository name for llvm tests --- .github/workflows/llvm-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 1cffc3ef4d97..1fcd67a10078 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -56,7 +56,7 @@ jobs: - name: Get LLVM version id: version - uses: tstellar/actions/get-llvm-version@get-version + uses: llvm/actions/get-llvm-version@main - name: Setup Variables id: vars From 2a57ea296a4787828b52799564d7ddf02ec1c4f3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 1 Feb 2021 13:05:19 +0000 Subject: [PATCH 032/244] workflows: Add job to check for ABI changes in libclang.so and libclang-cpp.so --- .github/workflows/libclang-abi-tests.yml | 132 +++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 .github/workflows/libclang-abi-tests.yml diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml new file mode 100644 index 000000000000..5681c7c8166e --- /dev/null +++ b/.github/workflows/libclang-abi-tests.yml @@ -0,0 +1,132 @@ +name: libclang ABI Tests + +on: + push: + branches: + - 'release/**' + paths: + - 'clang/**' + - '.github/workflows/libclang-abi-tests.yml' + pull_request: + paths: + - 'clang/**' + - '.github/workflows/libclang-abi-tests.yml' + +jobs: + abi-dump-setup: + runs-on: ubuntu-latest + outputs: + BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} + ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }} + BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} + LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} + steps: + - name: Checkout source + uses: actions/checkout@v1 + with: + fetch-depth: 1 + + - name: Get LLVM version + id: version + uses: llvm/actions/get-llvm-version@main + + - name: Setup Variables + id: vars + run: | + if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then + echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + echo ::set-output name=ABI_HEADERS::clang-c + echo ::set-output name=ABI_LIBS::libclang.so + else + echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + echo ::set-output name=ABI_HEADERS::. + echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so + fi + + abi-dump: + needs: abi-dump-setup + runs-on: ubuntu-latest + strategy: + matrix: + name: + - build-baseline + - build-latest + include: + - name: build-baseline + llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} + ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 + repo: llvm/llvm-project + - name: build-latest + llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} + ref: ${{ github.sha }} + repo: ${{ github.repository }} + steps: + - name: Install Ninja + uses: llvm/actions/install-ninja@main + - name: Install abi-compliance-checker + run: | + sudo apt-get install abi-dumper autoconf pkg-config + - name: Install universal-ctags + run: | + git clone https://github.com/universal-ctags/ctags.git + cd ctags + ./autogen.sh + ./configure + sudo make install + - name: Download source code + uses: llvm/actions/get-llvm-project-src@main + with: + ref: ${{ matrix.ref }} + repo: ${{ matrix.repo }} + - name: Configure + run: | + mkdir install + cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm + - name: Build + run: ninja -C build/ ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} install-clang-headers + - name: Dump ABI + run: | + parallel abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o {}-${{ matrix.ref }}.abi ./build/lib/{} ::: ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} + for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do + # Remove symbol versioning from dumps, so we can compare across major versions. + sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi + tar -czf $lib-${{ matrix.ref }}.abi.tar.gz $lib-${{ matrix.ref }}.abi + done + - name: Upload ABI file + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.name }} + path: "*${{ matrix.ref }}.abi.tar.gz" + + abi-compare: + runs-on: ubuntu-latest + needs: + - abi-dump-setup + - abi-dump + steps: + - name: Download baseline + uses: actions/download-artifact@v1 + with: + name: build-baseline + - name: Download latest + uses: actions/download-artifact@v1 + with: + name: build-latest + + - name: Install abi-compliance-checker + run: sudo apt-get install abi-compliance-checker + - name: Compare ABI + run: | + for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do + abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi.tar.gz -new build-latest/$lib*.abi.tar.gz + done + - name: Upload ABI Comparison + if: always() + uses: actions/upload-artifact@v2 + with: + name: compat-report-${{ github.sha }} + path: compat_reports/ + From c1899cd5102dbdacd006fdb33db075319ccc933f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 28 Jan 2021 11:21:21 +0000 Subject: [PATCH 033/244] [X86][AVX] Add PR48908 shuffle test case (cherry picked from commit da8845fc3d3bb0b0e133f020931440511fa72723) --- .../X86/vector-shuffle-combining-avx.ll | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 979c365acfd7..3da83b25d363 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -436,6 +436,157 @@ entry: unreachable } +define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias %out2) { +; X86-AVX1-LABEL: PR48908: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] +; X86-AVX1-NEXT: vmovapd %ymm4, (%edx) +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX1-NEXT: vmovapd %ymm3, (%ecx) +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X86-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X86-AVX1-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX1-NEXT: vzeroupper +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: PR48908: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1] +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3] +; X86-AVX2-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3] +; X86-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0] +; X86-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX2-NEXT: vmovapd %ymm3, (%ecx) +; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X86-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X86-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X86-AVX2-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: PR48908: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; X86-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; X86-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2] +; X86-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3] +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0] +; X86-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3 +; X86-AVX512-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0] +; X86-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4 +; X86-AVX512-NEXT: vmovapd %ymm4, (%ecx) +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u> +; X86-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 +; X86-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0] +; X86-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0 +; X86-AVX512-NEXT: vmovapd %ymm0, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX1-LABEL: PR48908: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] +; X64-AVX1-NEXT: vmovapd %ymm4, (%rdi) +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX1-NEXT: vmovapd %ymm3, (%rsi) +; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X64-AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X64-AVX1-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: PR48908: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX2-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1] +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3] +; X64-AVX2-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3] +; X64-AVX2-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0] +; X64-AVX2-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX2-NEXT: vmovapd %ymm3, (%rsi) +; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X64-AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] +; X64-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; X64-AVX2-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] +; X64-AVX2-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: PR48908: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; X64-AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; X64-AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; X64-AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2] +; X64-AVX512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX512-NEXT: vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3] +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm5 = [0,3,8,1] +; X64-AVX512-NEXT: vpermt2pd %zmm2, %zmm5, %zmm3 +; X64-AVX512-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1] +; X64-AVX512-NEXT: vpermt2pd %zmm0, %zmm3, %zmm4 +; X64-AVX512-NEXT: vmovapd %ymm4, (%rsi) +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = <3,11,u,u> +; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 +; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3] +; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0 +; X64-AVX512-NEXT: vmovapd %ymm0, (%rdx) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %t0 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> + %t1 = shufflevector <4 x double> %v1, <4 x double> %v2, <4 x i32> + %r0 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> + store <4 x double> %r0, <4 x double>* %out0, align 32 + %r1 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> + store <4 x double> %r1, <4 x double>* %out1, align 32 + %t2 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> + %r2 = shufflevector <4 x double> %t2, <4 x double> %v2, <4 x i32> + store <4 x double> %r2, <4 x double>* %out2, align 32 + ret void +} + define <4 x i64> @concat_self_v4i64(<2 x i64> %x) { ; AVX1-LABEL: concat_self_v4i64: ; AVX1: # %bb.0: From 52a70a07e93c322ad137bce1a1ff2f1c9fdf6050 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 28 Jan 2021 12:11:31 +0000 Subject: [PATCH 034/244] [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps - don't merge VPERMILPD ops with different low/high masks. Unlike VPERMILPS, VPERMILPD can have non-repeating masks in each 128-bit subvector, we weren't accounting for this when folding vperm2f128(vpermilpd(x,c),vpermilpd(y,c)) -> vpermilpd(vperm2f128(x,y),c). I'm intending to add support for this but wanted to get a minimal fix in first for merging into 12.xx. Fixes PR48908 (cherry picked from commit 6663330bc8c84a75ea092272297b557bfc310380) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++- .../X86/vector-shuffle-combining-avx.ll | 40 ++++++++++--------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0dd20235aa3c..6b816c710f98 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36916,11 +36916,18 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res)); return DAG.getBitcast(VT, Res); } + case X86ISD::VPERMILPI: + // TODO: Handle v4f64 permutes with different low/high lane masks. + if (SrcVT0 == MVT::v4f64) { + uint64_t Mask = Src0.getConstantOperandVal(1); + if ((Mask & 0x3) != ((Mask >> 2) & 0x3)) + break; + } + LLVM_FALLTHROUGH; case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: case X86ISD::PSHUFD: - case X86ISD::VPERMILPI: if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0)); SDValue RHS = diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 3da83b25d363..1a1153d0e886 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -442,16 +442,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] -; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 -; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X86-AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm5 +; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3] ; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] -; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] -; X86-AVX1-NEXT: vmovapd %ymm4, (%edx) -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] -; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] -; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3] +; X86-AVX1-NEXT: vmovapd %ymm3, (%edx) +; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3] +; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3] ; X86-AVX1-NEXT: vmovapd %ymm3, (%ecx) ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] @@ -513,16 +515,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x ; ; X64-AVX1-LABEL: PR48908: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1] -; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm4 -; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3] -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 +; X64-AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm5 +; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3] ; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] -; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2] -; X64-AVX1-NEXT: vmovapd %ymm4, (%rdi) -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1] -; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3] -; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3] +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3] +; X64-AVX1-NEXT: vmovapd %ymm3, (%rdi) +; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3] +; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3] ; X64-AVX1-NEXT: vmovapd %ymm3, (%rsi) ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3] From 0564dd904bf7ef7758cb904ed8f7f2a1f915ef8d Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Fri, 29 Jan 2021 08:44:56 +0100 Subject: [PATCH 035/244] [OpenMP] Fix python3 compatibility in openmp's lit.cfg Differential Revision: https://reviews.llvm.org/D95669 (cherry picked from commit c3c02d0d5a313272f6d35926bdf678fc6b884c02) --- openmp/runtime/test/lit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 0d4a6107ff2b..c4e5fe1ea9e0 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -76,7 +76,7 @@ if config.operating_system == 'Darwin': cmd = subprocess.Popen(['xcrun', '--show-sdk-path'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = cmd.communicate() - out = out.strip() + out = out.strip().decode() res = cmd.wait() if res == 0 and out: config.test_flags += " -isysroot " + out From e3658cefc5bc3538d05fc8ef058d83bcd24b785a Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sat, 30 Jan 2021 12:34:06 +0900 Subject: [PATCH 036/244] [VE] Change inetger constants 32-bit friendly Correct integer constants like `1UL << 63` to `UINT64_C(1) << 63` in order to make them work on 32-bit machines. Tested on both an i386 and x86_64 machines. Reviewed By: mgorny Differential Revision: https://reviews.llvm.org/D95724 (cherry picked from commit 4648098f97fa2a7c08c04632c70cf29293528812) --- llvm/lib/Target/VE/VE.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h index a404f7ced70a..8c1fa840f19c 100644 --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) { return true; } // (m)1 patterns - return (Val & (1UL << 63)) && isShiftedMask_64(Val); + return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val); } inline static bool isMImm32Val(uint32_t Val) { @@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) { return true; } // (m)1 patterns - return (Val & (1 << 31)) && isShiftedMask_32(Val); + return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val); } /// val2MImm - Convert an integer immediate value to target MImm immediate. inline static uint64_t val2MImm(uint64_t Val) { if (Val == 0) return 0; // (0)1 - if (Val & (1UL << 63)) + if (Val & (UINT64_C(1) << 63)) return countLeadingOnes(Val); // (m)1 return countLeadingZeros(Val) | 0x40; // (m)0 } @@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) { if (Val == 0) return 0; // (0)1 if ((Val & 0x40) == 0) - return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1 - return ((uint64_t)(-1L) >> (Val & 0x3f)); // (m)0 + return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1 + return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0 } inline unsigned M0(unsigned Val) { return Val + 64; } From b351efcae08a59c0cafa123a92b24c5f2300202b Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Wed, 27 Jan 2021 23:08:39 -0600 Subject: [PATCH 037/244] [PowerPC] Do not emit XXSPLTI32DX for sub 64-bit constants If the APInt returned by BuildVectorSDNode::isConstantSplat() is narrower than 64 bits, the result produced by XXSPLTI32DX is incorrect. The result returned by the function appears to be incorrect and we'll investigate/fix it in a follow-up commit. However, since this causes miscompiles, we must temporarily disable emitting this instruction for such values. (cherry picked from commit 54e570d94af995ff58287a8288389641910a8239) --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 ++- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll | 22 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9215c17cb94b..663ee15db11e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8613,7 +8613,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); - } else { // We may lose precision, so we have to use XXSPLTI32DX. + } else if (APSplatBits.getBitWidth() == 64) { + // We may lose precision, so we have to use XXSPLTI32DX. uint32_t Hi = (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll index 420a96dc1495..081cae729acf 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -100,3 +100,25 @@ define dso_local <8 x i16> @test_xxsplti32dx_9() { entry: ret <8 x i16> } + +define dso_local <16 x i8> @test_xxsplti32dx_10() { +; CHECK-LABEL: test_xxsplti32dx_10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: xxsplti32dx vs34, 0, 1207959552 +; CHECK-NEXT: blr +entry: + ret <16 x i8> +} + +; FIXME: It appears that there is something wrong with the computation +; of the 64-bit constant to splat so we cannot emit xxsplti32dx for +; this test case for now. +define dso_local <16 x i8> @constSplatBug() { +; CHECK-LABEL: constSplatBug: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv vs34, .LCPI10_0@PCREL(0), 1 +; CHECK-NEXT: blr +entry: + ret <16 x i8> +} From dfb763363bc560769605e37e96c1d13cb236223d Mon Sep 17 00:00:00 2001 From: Albion Fung Date: Thu, 28 Jan 2021 15:17:18 -0500 Subject: [PATCH 038/244] [PowerPC][Power10] Fix XXSPLI32DX not correctly exploiting specific cases Some cases may be transformed into 32 bit splats before hitting the boolean statement, which may cause incorrect behaviour and provide XXSPLTI32DX with the incorrect values of splat. The condition was reversed so that the shortcut prevents this problem. Differential Revision: https://reviews.llvm.org/D95634 (cherry picked from commit 2e470e03b49f1d79ebc315ca9d62a690a633c0cd) --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++++++---- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll | 16 ++-------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 663ee15db11e..929a72ac687e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8604,16 +8604,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same - // double. This is to exploit the XXSPLTIDP instruction.+ // If we lose precision, we use XXSPLTI32DX. + // double. This is to exploit the XXSPLTIDP instruction. + // If we lose precision, we use XXSPLTI32DX. if (BVNIsConstantSplat && (SplatBitSize == 64) && Subtarget.hasPrefixInstrs()) { - if (convertToNonDenormSingle(APSplatBits) && - (Op->getValueType(0) == MVT::v2f64)) { + // Check the type first to short-circuit so we don't modify APSplatBits if + // this block isn't executed. + if ((Op->getValueType(0) == MVT::v2f64) && + convertToNonDenormSingle(APSplatBits)) { SDValue SplatNode = DAG.getNode( PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); - } else if (APSplatBits.getBitWidth() == 64) { + } else { // We may lose precision, so we have to use XXSPLTI32DX. uint32_t Hi = diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll index 081cae729acf..ce4c2da24b0d 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -101,23 +101,11 @@ entry: ret <8 x i16> } -define dso_local <16 x i8> @test_xxsplti32dx_10() { -; CHECK-LABEL: test_xxsplti32dx_10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlxor vs34, vs34, vs34 -; CHECK-NEXT: xxsplti32dx vs34, 0, 1207959552 -; CHECK-NEXT: blr -entry: - ret <16 x i8> -} - -; FIXME: It appears that there is something wrong with the computation -; of the 64-bit constant to splat so we cannot emit xxsplti32dx for -; this test case for now. define dso_local <16 x i8> @constSplatBug() { ; CHECK-LABEL: constSplatBug: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI10_0@PCREL(0), 1 +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NEXT: xxsplti32dx vs34, 0, 1191182336 ; CHECK-NEXT: blr entry: ret <16 x i8> From 237b39a02f38b4903f39fef362d0f5e98e1de194 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Fri, 29 Jan 2021 21:59:49 +0800 Subject: [PATCH 039/244] [RISCV] Update the version number to v0.10 for vector. v0.10 is tagged in V specification. Update the version to v0.10. Differential Revision: https://reviews.llvm.org/D95680 (cherry picked from commit 282aca10aeb03bdaef0a8d4f3faa4c2ff236e527) --- clang/lib/Basic/Targets/RISCV.cpp | 6 +++--- clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 2 +- clang/test/Driver/riscv-arch.c | 6 +++--- .../test/Preprocessor/riscv-target-features.c | 18 +++++++++--------- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 6 +++--- .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp | 6 +++--- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 2 +- llvm/test/CodeGen/RISCV/attributes.ll | 8 ++++---- llvm/test/MC/RISCV/attribute-arch.s | 8 ++++---- 9 files changed, 31 insertions(+), 31 deletions(-) diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 0bf02e605740..786201ea340d 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -150,7 +150,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, } if (HasV) { - Builder.defineMacro("__riscv_v", "1000000"); + Builder.defineMacro("__riscv_v", "10000"); Builder.defineMacro("__riscv_vector"); } @@ -191,10 +191,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__riscv_zfh", "1000"); if (HasZvamo) - Builder.defineMacro("__riscv_zvamo", "1000000"); + Builder.defineMacro("__riscv_zvamo", "10000"); if (HasZvlsseg) - Builder.defineMacro("__riscv_zvlsseg", "1000000"); + Builder.defineMacro("__riscv_zvlsseg", "10000"); } /// Return true if has this feature, need to sync with handleTargetFeatures. diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index ffae47e5672e..c7f2a3ea5e02 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) { Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc") return RISCVExtensionVersion{"0", "93"}; if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg") - return RISCVExtensionVersion{"1", "0"}; + return RISCVExtensionVersion{"0", "10"}; if (Ext == "zfh") return RISCVExtensionVersion{"0", "1"}; return None; diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 3762a4aef1b3..cf148ca885d0 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -384,7 +384,7 @@ // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1' // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v" @@ -412,7 +412,7 @@ // RV32-EXPERIMENTAL-ZVAMO-BADVERS: error: invalid arch name 'rv32izvamo0p1' // RV32-EXPERIMENTAL-ZVAMO-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVAMO-GOODVERS %s // RV32-EXPERIMENTAL-ZVAMO-GOODVERS: "-target-feature" "+experimental-zvamo" @@ -431,6 +431,6 @@ // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: error: invalid arch name 'rv32izvlsseg0p1' // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS %s // RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS: "-target-feature" "+experimental-zvlsseg" diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 006395505246..88826bbd60b8 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -110,23 +110,23 @@ // CHECK-DOUBLE-NOT: __riscv_float_abi_single // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv32iv1p0 -x c -E -dM %s \ +// RUN: -march=rv32iv0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv1p0 -x c -E -dM %s \ +// RUN: -march=rv64iv0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg1p0 -x c -E -dM %s \ +// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg0p10 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// CHECK-V-EXT: __riscv_v 1000000 +// CHECK-V-EXT: __riscv_v 10000 // CHECK-V-EXT: __riscv_vector 1 -// CHECK-V-EXT: __riscv_zvamo 1000000 -// CHECK-V-EXT: __riscv_zvlsseg 1000000 +// CHECK-V-EXT: __riscv_zvamo 10000 +// CHECK-V-EXT: __riscv_zvlsseg 10000 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izba0p93 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZBA-EXT %s diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index e7e590153605..dcf7525d7458 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2126,7 +2126,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() { if (getFeatureBits(RISCV::FeatureStdExtB)) formalArchStr = (Twine(formalArchStr) + "_b0p93").str(); if (getFeatureBits(RISCV::FeatureStdExtV)) - formalArchStr = (Twine(formalArchStr) + "_v1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_v0p10").str(); if (getFeatureBits(RISCV::FeatureExtZfh)) formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str(); if (getFeatureBits(RISCV::FeatureExtZba)) @@ -2152,9 +2152,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() { if (getFeatureBits(RISCV::FeatureExtZbt)) formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str(); if (getFeatureBits(RISCV::FeatureExtZvamo)) - formalArchStr = (Twine(formalArchStr) + "_zvamo1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str(); if (getFeatureBits(RISCV::FeatureStdExtZvlsseg)) - formalArchStr = (Twine(formalArchStr) + "_zvlsseg1p0").str(); + formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str(); getTargetStreamer().emitTextAttribute(Tag, formalArchStr); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 72434a15bedb..13c4b84aa300 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -63,7 +63,7 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureStdExtB)) Arch += "_b0p93"; if (STI.hasFeature(RISCV::FeatureStdExtV)) - Arch += "_v1p0"; + Arch += "_v0p10"; if (STI.hasFeature(RISCV::FeatureExtZfh)) Arch += "_zfh0p1"; if (STI.hasFeature(RISCV::FeatureExtZba)) @@ -89,9 +89,9 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { if (STI.hasFeature(RISCV::FeatureExtZbt)) Arch += "_zbt0p93"; if (STI.hasFeature(RISCV::FeatureExtZvamo)) - Arch += "_zvamo1p0"; + Arch += "_zvamo0p10"; if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg)) - Arch += "_zvlsseg1p0"; + Arch += "_zvlsseg0p10"; emitTextAttribute(RISCVAttrs::ARCH, Arch); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 4f9e9cfbdb98..e02c9f8bcbe2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file describes the RISC-V instructions from the standard 'V' Vector -/// extension, version 0.9. +/// extension, version 0.10. /// This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index a0943d5d4293..c26a6d5b4a69 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -47,7 +47,7 @@ ; RV32D: .attribute 5, "rv32i2p0_f2p0_d2p0" ; RV32C: .attribute 5, "rv32i2p0_c2p0" ; RV32B: .attribute 5, "rv32i2p0_b0p93_zba0p93_zbb0p93_zbc0p93_zbe0p93_zbf0p93_zbm0p93_zbp0p93_zbr0p93_zbs0p93_zbt0p93" -; RV32V: .attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0" +; RV32V: .attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" ; RV32ZFH: .attribute 5, "rv32i2p0_f2p0_zfh0p1" ; RV32ZBA: .attribute 5, "rv32i2p0_zba0p93" ; RV32ZBB: .attribute 5, "rv32i2p0_zbb0p93" @@ -60,7 +60,7 @@ ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93" ; RV32ZBS: .attribute 5, "rv32i2p0_zbs0p93" ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93" -; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v1p0_zfh0p1_zbb0p93_zvamo1p0_zvlsseg1p0" +; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v0p10_zfh0p1_zbb0p93_zvamo0p10_zvlsseg0p10" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" @@ -80,8 +80,8 @@ ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93" ; RV64ZBS: .attribute 5, "rv64i2p0_zbs0p93" ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93" -; RV64V: .attribute 5, "rv64i2p0_v1p0_zvamo1p0_zvlsseg1p0" -; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v1p0_zfh0p1_zbb0p93_zvamo1p0_zvlsseg1p0" +; RV64V: .attribute 5, "rv64i2p0_v0p10_zvamo0p10_zvlsseg0p10" +; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v0p10_zfh0p1_zbb0p93_zvamo0p10_zvlsseg0p10" define i32 @addi(i32 %a) { diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 66d7ad576382..51d0c6ace9e1 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -40,7 +40,7 @@ # CHECK: attribute 5, "rv32i2p0_b0p93_zba0p93_zbb0p93_zbc0p93_zbe0p93_zbf0p93_zbm0p93_zbp0p93_zbr0p93_zbs0p93_zbt0p93" .attribute arch, "rv32iv" -# CHECK: attribute 5, "rv32i2p0_v1p0" +# CHECK: attribute 5, "rv32i2p0_v0p10" .attribute arch, "rv32izba" # CHECK: attribute 5, "rv32i2p0_zba0p93" @@ -79,7 +79,7 @@ # CHECK: attribute 5, "rv32i2p0_f2p0_zfh0p1" .attribute arch, "rv32ivzvamo_zvlsseg" -# CHECK: attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0" +# CHECK: attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" -.attribute arch, "rv32iv_zvamo1p0_zvlsseg" -# CHECK: attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0" +.attribute arch, "rv32iv_zvamo0p10_zvlsseg" +# CHECK: attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" From c738c8aa9bf387cc960feca81bc5263e8c634e15 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Sat, 30 Jan 2021 07:54:41 +0800 Subject: [PATCH 040/244] [RISCV] Update the version number to v0.10 for vector. (cherry picked from commit 9847023660467a4469b5667bcf7a4c73a4780037) --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 06e4d053d5d7..9fdfc2727d86 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file contains the required infrastructure to support code generation -/// for the standard 'V' (Vector) extension, version 0.9. This version is still +/// for the standard 'V' (Vector) extension, version 0.10. This version is still /// experimental as the 'V' extension hasn't been ratified yet. /// /// This file is included from RISCVInstrInfoV.td diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index aea3d0e17ccc..79a1e6ddc8a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -8,7 +8,7 @@ /// /// This file contains the required infrastructure and SDNode patterns to /// support code generation for the standard 'V' (Vector) extension, version -/// 0.9. This version is still experimental as the 'V' extension hasn't been +/// 0.10. This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// /// This file is included from and depends upon RISCVInstrInfoVPseudos.td From c5904f5c9d32e563e2898e1242d5818e488fe2ee Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Sat, 16 Jan 2021 16:08:40 +0000 Subject: [PATCH 041/244] [LV] Fix crash when computing max VF too early D90687 introduced a crash: llvm::LoopVectorizationCostModel::computeMaxVF(llvm::ElementCount, unsigned int): Assertion `WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() && "No decisions should have been taken at this point"' failed. when compiling the following C code: typedef struct { char a; } b; b *c; int d, e; int f() { int g = 0; for (; d; d++) { e = 0; for (; e < c[d].a; e++) g++; } return g; } with: clang -Os -target hexagon -mhvx -fvectorize -mv67 testcase.c -S -o - This occurred since prior to D90687 computeFeasibleMaxVF would only be called in computeMaxVF when a scalar epilogue was allowed, but now it's always called. This causes the assert above since computeFeasibleMaxVF collects all viable VFs larger than the default MaxVF, and for each VF calculates the register usage which results in analysis being done the assert above guards against. This can occur in computeFeasibleMaxVF if TTI.shouldMaximizeVectorBandwidth and this target hook is implemented in the hexagon backend to always return true. Reported by @iajbar. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D94869 (cherry picked from commit 8cda227432f1c9ceb63b88802ed8136da97274f1) --- .../Transforms/Vectorize/LoopVectorize.cpp | 7 ++--- .../LoopVectorize/Hexagon/maximum-vf-crash.ll | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ea0d7673edf6..47635dbdda02 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5504,11 +5504,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return None; } - ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); - switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); case CM_ScalarEpilogueNotAllowedUsePredicate: LLVM_FALLTHROUGH; case CM_ScalarEpilogueNotNeededUsePredicate: @@ -5546,7 +5544,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " "scalar epilogue instead.\n"); ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); } return None; } @@ -5563,6 +5561,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } + ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); assert(!MaxVF.isScalable() && "Scalable vectors do not yet support tail folding"); assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll new file mode 100644 index 000000000000..5f8c5d329edf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll @@ -0,0 +1,29 @@ +; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s 2>&1 | FileCheck %s + +; Check that we don't crash. + +; CHECK-LABEL: @f +; CHECK: vector.body + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +; Function Attrs: optsize +define i32 @f() #0 { +entry: + br label %loop + +loop: + %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ] + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %0 = load i8, i8* undef, align 1 + %g.1.lcssa = add i32 %g.016, undef + %iv.next = add nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret i32 %g.1.lcssa +} + +attributes #0 = { optsize "target-features"="+hvx-length128b" } From b15f3fc5c71dc8a9db7e931e2922a065293e4a64 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Wed, 27 Jan 2021 16:47:21 +0000 Subject: [PATCH 042/244] [X86] Fix disassembly of x86-64 GDTLS code sequence For x86-64 the REX.w prefix takes precedence over any other size override (i.e. 0x66). Therefore, for x86-64 when REX.w is present set 'hasOpSize' to false to ensure that any size override is ignored. Fixes PR48901. Differential Revision: https://reviews.llvm.org/D95682 (cherry picked from commit 94fedd266125a5425aa33e11332bf414f0b6dc35) --- .../X86/Disassembler/X86Disassembler.cpp | 1 + llvm/test/MC/Disassembler/X86/x86-64.txt | 10 +++++++--- .../llvm-objdump/X86/disassemble-gdtls.s | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 05e482a6b66e..4e6d8e8e1a54 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) { insn->addressSize = (insn->hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; + insn->hasOpSize = false; } else { insn->registerSize = (insn->hasOpSize ? 2 : 4); insn->addressSize = (insn->hasAdSize ? 4 : 8); diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt index d91ef2500d99..5e56d4c796e6 100644 --- a/llvm/test/MC/Disassembler/X86/x86-64.txt +++ b/llvm/test/MC/Disassembler/X86/x86-64.txt @@ -329,8 +329,10 @@ # CHECK: callw 32767 0x66 0xe8 0xff 0x7f -# CHECK: callw 32767 -0x66 0x66 0x48 0xe8 0xff 0x7f +# TODO: Should display data16 prefixes. +# CHECK-NOT: data16 +# CHECK: callq 32767 +0x66 0x66 0x48 0xe8 0xff 0x7f 0x00 0x00 # CHECK: jmp -32769 0xe9 0xff 0x7f 0xff 0xff @@ -338,8 +340,10 @@ # CHECK: jmp 32767 0x66 0xe9 0xff 0x7f +# TODO: Should display data16 prefixes. +# CHECK-NOT: data16 # CHECK: jmp 32767 -0x66 0x66 0x48 0xe9 0xff 0x7f +0x66 0x66 0x48 0xe9 0xff 0x7f 0x00 0x00 # CHECK: jo -32769 0x0f 0x80 0xff 0x7f 0xff 0xff diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s new file mode 100644 index 000000000000..e913f5f6a345 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc %s -filetype=obj -triple=x86_64 | llvm-objdump -d - | FileCheck %s + +# CHECK: : +# TODO: Should display data16 prefixes. +# CHECK-NEXT: 0: 66 48 8d 3d 00 00 00 00 leaq (%rip), %rdi # 8 +# CHECK-NEXT: 8: 66 66 48 e8 00 00 00 00 callq 0x10 +# CHECK-EMPTY: + +PR48901: + data16 + leaq bar@TLSGD(%rip),%rdi + data16 + data16 + rex64 + callq __tls_get_addr@PLT + +.section .tdata,"awT",@progbits +bar: +.long 42 From e2d822c3bdf6388c6ef21f35745105aba064d16d Mon Sep 17 00:00:00 2001 From: Haowei Wu Date: Thu, 28 Jan 2021 14:13:20 -0800 Subject: [PATCH 043/244] [elfabi] Fix tests which failed on different timezones This patch fixes elfabi tests on machines using a GMT+X timezone settings. Differential Revision: https://reviews.llvm.org/D95641 (cherry picked from commit 771b35965457ebd5faaed8a1c3d2bcefffe721a3) --- llvm/test/tools/llvm-elfabi/preserve-dates-stub.test | 4 ++-- llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test index c399029e0337..9742a61aa281 100644 --- a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test +++ b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test @@ -1,9 +1,9 @@ ## Test writing unchanged content to ELF Stub file with --write-if-changed flag. # RUN: llvm-elfabi %s --output-target=elf64-little %t -# RUN: touch -m -t 197001010000 %t +# RUN: env TZ=GMT touch -m -t 197001010000 %t # RUN: llvm-elfabi %s --output-target=elf64-little %t --write-if-changed -# RUN: ls -l %t | FileCheck %s +# RUN: env TZ=GMT ls -l %t | FileCheck %s --- !tapi-tbe TbeVersion: 1.0 diff --git a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test index 89cad7733eee..3ec190067c73 100644 --- a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test +++ b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test @@ -1,8 +1,8 @@ ## Test writing unchanged content to TBE file with --write-if-changed flag. # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t -# RUN: touch -m -t 197001010000 %t +# RUN: env TZ=GMT touch -m -t 197001010000 %t # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t --write-if-changed -# RUN: ls -l %t | FileCheck %s +# RUN: env TZ=GMT ls -l %t | FileCheck %s # CHECK: {{[[:space:]]1970}} From 12b6579b79dc21e9e54e74520ece0d571a640d4b Mon Sep 17 00:00:00 2001 From: Atmn Patel Date: Wed, 27 Jan 2021 18:49:41 -0500 Subject: [PATCH 044/244] [OpenMP][Libomptarget] Fix conditional in CMake for remote plugin The remote offloading plugin's CMakeLists was trying to build if its flag was enabled even if it didn't find gRPC/protobuf. The conditional was wrong, it's fixed by this. Differential Revision: https://reviews.llvm.org/D95574 (cherry picked from commit 8a77056256d9970387595a5c729d894e3fe07131) --- openmp/libomptarget/plugins/remote/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/openmp/libomptarget/plugins/remote/CMakeLists.txt b/openmp/libomptarget/plugins/remote/CMakeLists.txt index 1baa1125f44c..989c74642c66 100644 --- a/openmp/libomptarget/plugins/remote/CMakeLists.txt +++ b/openmp/libomptarget/plugins/remote/CMakeLists.txt @@ -42,12 +42,13 @@ if (Protobuf_FOUND AND gRPC_FOUND AND PROTOC AND GRPC_CPP_PLUGIN) set(GRPC_INCLUDE_DIR ${directory} ) + + set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) + set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/) + + add_subdirectory(src) + add_subdirectory(server) else() libomptarget_say("Not building remote offloading plugin: required libraries were not found.") endif() -set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/) -set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/) - -add_subdirectory(src) -add_subdirectory(server) From 4d0874c72a0a3f53eb3084a1ea3ee4456ab6e004 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 08:13:28 -0500 Subject: [PATCH 045/244] [OpenMP][NVPTX] Added the missing -O1 when building NVPTX bitcode libraries In the past `-O1` was used when building NVPTX bitcode libraries. After we switched to OpenMP, `-O1` was missing by mistake, leading to a huge performance regression. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D95545 (cherry picked from commit 5a64794bbad4010778406dfee7748e6080258dbf) --- .../libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 23efbba29d66..eeda137ef120 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -126,14 +126,14 @@ set(cuda_src_files ) # Set flags for LLVM Bitcode compilation. -set(bc_flags -S -x c++ - -target nvptx64 - -Xclang -emit-llvm-bc - -Xclang -aux-triple -Xclang ${aux_triple} - -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device - -D__CUDACC__ - -I${devicertl_base_directory} - -I${devicertl_nvptx_directory}/src) +set(bc_flags -S -x c++ -O1 -std=c++14 + -target nvptx64 + -Xclang -emit-llvm-bc + -Xclang -aux-triple -Xclang ${aux_triple} + -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device + -D__CUDACC__ + -I${devicertl_base_directory} + -I${devicertl_nvptx_directory}/src) if(${LIBOMPTARGET_NVPTX_DEBUG}) list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=-1) From 5d926bb3c46848c704833e0f02884395609388a3 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 08:12:39 -0500 Subject: [PATCH 046/244] [OpenMP][deviceRTLs] Added `[[clang::loader_uninitialized]]` explicitly `[[clang::loader_uninitialized]]` is in macro `SHARED` but it doesn't work for array like `parallelLevel`, so the variable will be zero initialized. There is also a similar issue for `omptarget_nvptx_device_State` which is in global address space. Its c'tor is also generated, which was not in the past when building the `deviceRTLs` with CUDA. In this patch, we added the attribute to the two variables explicitly. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95550 (cherry picked from commit 19248d30e4ed5250fa84abbbd52fc7b835918a45) --- openmp/libomptarget/deviceRTLs/common/src/omp_data.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu index b91afd7476fe..4736d07108e0 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu @@ -25,7 +25,8 @@ DEVICE omptarget_device_environmentTy omptarget_device_environment; // global data holding OpenMP state information //////////////////////////////////////////////////////////////////////////////// -DEVICE +// OpenMP will try to call its ctor if we don't add the attribute explicitly +[[clang::loader_uninitialized]] DEVICE omptarget_nvptx_Queue omptarget_nvptx_device_State[MAX_SM]; @@ -33,7 +34,9 @@ DEVICE omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager; DEVICE uint32_t SHARED(usedMemIdx); DEVICE uint32_t SHARED(usedSlotIdx); -DEVICE uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; +// SHARED doesn't work with array so we add the attribute explicitly. +[[clang::loader_uninitialized]] DEVICE uint8_t + parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE]; #pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc) DEVICE uint16_t SHARED(threadLimit); DEVICE uint16_t SHARED(threadsInTeam); From 255f7398845a7cfb47aef53e40b68057ec56839e Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 29 Jan 2021 13:12:47 -0500 Subject: [PATCH 047/244] [OpenMP][NFC] Added release note for new `deviceRTLs` and hidden helper task Added release note for new `deviceRTLs` and hidden helper task for LLVM 12. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95584 (cherry picked from commit 7bc31018f71cac22b7060c49cefb6f3d0d2e2069) --- openmp/docs/ReleaseNotes.rst | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst index 7f40d3c81510..cb3464ad84f0 100644 --- a/openmp/docs/ReleaseNotes.rst +++ b/openmp/docs/ReleaseNotes.rst @@ -7,7 +7,7 @@ OpenMP 12.0.0 Release Notes These are in-progress notes for the upcoming LLVM 12.0.0 release. Release notes for previous releases can be found on `the Download Page `_. - + Introduction ============ @@ -44,3 +44,27 @@ Non-comprehensive list of changes in this release ``LIBOMPTARGET_INFO`` allows the user to request certain information from the ``libomptarget`` runtime using a 32-bit field. A full description of each environment variable is described :ref:`here `. + +- ``target nowait`` was supported via hidden helper task, which is a task not + bound to any parallel region. A hidden helper team with a number of threads is + created when the first hidden helper task is encountered. The number of threads + can be configured via the environment variable + ``LIBOMP_NUM_HIDDEN_HELPER_THREADS``. By default it is 8. If + ``LIBOMP_NUM_HIDDEN_HELPER_THREADS=0``, hidden helper task is disabled and + falls back to a regular OpenMP task. It can also be disabled by setting the + environment variable ``LIBOMP_USE_HIDDEN_HELPER_TASK=OFF``. + +- ``deviceRTLs`` for NVPTX platform is CUDA free now. It is generally OpenMP code. + Target dependent parts are implemented with Clang/LLVM/NVVM intrinsics. CUDA + SDK is also dropped as a dependence to build the device runtime, which means + device runtime can also be built on a CUDA free system. However, it is + disabled by default. Set the CMake variable + ``LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`` to enable the build of NVPTX device + runtime on a CUDA free system. ``gcc-multilib`` and ``g++-multilib`` are + required. If CUDA is found, the device runtime will be built by default. + + - Static NVPTX device runtime library (``libomptarget-nvptx.a``) was dropped. + A bitcode library is required to build an OpenMP program. If the library is + not found in the default path or any of the paths defined by ``LIBRARY_PATH``, + an error will be raised. User can also specify the path to the bitcode device + library via ``--libomptarget-nvptx-bc-path=``. From 922e4149d16754b54ce225faa3e769d32937d7ad Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 1 Feb 2021 10:31:09 -0500 Subject: [PATCH 048/244] [OpenMP] Fix seg fault in libomptarget when using Info with multiple threads Summary: One option for the LIBOMPTARGET_INFO environment variable is to print the current status of the device's data mappings. These are a shared resource among threads so this needs to be protected when using multiple streams. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95786 (cherry picked from commit fda48539988d2a1bdb6395799151e9090312a20b) --- openmp/libomptarget/src/interface.cpp | 4 ++-- openmp/libomptarget/src/private.h | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index cf6d36960c75..01f3715d6bcc 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -58,7 +58,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) { case tgt_mandatory: if (!success) { if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE) - for (const auto &Device : PM->Devices) + for (auto &Device : PM->Devices) dumpTargetPointerMappings(loc, Device); else FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target " @@ -76,7 +76,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) { 1, "failure of target construct while offloading is mandatory"); } else { if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE) - for (const auto &Device : PM->Devices) + for (auto &Device : PM->Devices) dumpTargetPointerMappings(loc, Device); } break; diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index fb6f681d3020..3b0e57dfe15e 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -99,7 +99,7 @@ int __kmpc_get_target_offload(void) __attribute__((weak)); //////////////////////////////////////////////////////////////////////////////// /// dump a table of all the host-target pointer pairs on failure static inline void dumpTargetPointerMappings(const ident_t *Loc, - const DeviceTy &Device) { + DeviceTy &Device) { if (Device.HostDataToTargetMap.empty()) return; @@ -109,6 +109,7 @@ static inline void dumpTargetPointerMappings(const ident_t *Loc, Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn()); INFO(OMP_INFOTYPE_ALL, Device.DeviceID, "%-18s %-18s %s %s %s\n", "Host Ptr", "Target Ptr", "Size (B)", "RefCount", "Declaration"); + Device.DataMapMtx.lock(); for (const auto &HostTargetMap : Device.HostDataToTargetMap) { SourceInfo Info(HostTargetMap.HstPtrName); INFO(OMP_INFOTYPE_ALL, Device.DeviceID, @@ -118,6 +119,7 @@ static inline void dumpTargetPointerMappings(const ident_t *Loc, HostTargetMap.getRefCount(), Info.getName(), Info.getFilename(), Info.getLine(), Info.getColumn()); } + Device.DataMapMtx.unlock(); } //////////////////////////////////////////////////////////////////////////////// From 678c259d277135ef32861887a8ac8618deba5f24 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 3 Feb 2021 14:57:19 -0800 Subject: [PATCH 049/244] PR44325 (and duplicates): don't issue -Wzero-as-null-pointer-constant when rewriting 'a < b' as '(a <=> b) < 0'. It's pretty common for comparison category types to use a pointer or pointer-to-member type as their '0' parameter. (cherry picked from commit 1f06f41993b6363e6b2c4f22a13488a3e687f31b) --- clang/lib/Sema/Sema.cpp | 7 +++++++ .../SemaCXX/cxx2a-three-way-comparison.cpp | 20 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 55cb3aee6194..cb5a84a31235 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -537,6 +537,13 @@ void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, const Expr* E) { if (E->IgnoreParenImpCasts()->getType()->isNullPtrType()) return; + // Don't diagnose the conversion from a 0 literal to a null pointer argument + // in a synthesized call to operator<=>. + if (!CodeSynthesisContexts.empty() && + CodeSynthesisContexts.back().Kind == + CodeSynthesisContext::RewritingOperatorAsSpaceship) + return; + // If it is a macro from system header, and if the macro name is not "NULL", // do not warn. SourceLocation MaybeMacroLoc = E->getBeginLoc(); diff --git a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp index 353360e052bb..b94225274fff 100644 --- a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp +++ b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify %s -Wzero-as-null-pointer-constant // Keep this test before any declarations of operator<=>. namespace PR44786 { @@ -40,3 +40,21 @@ namespace PR47893 { int &f(...); int &r = f(A(), A()); } + +namespace PR44325 { + struct cmp_cat {}; + bool operator<(cmp_cat, void*); + bool operator>(cmp_cat, int cmp_cat::*); + + struct X {}; + cmp_cat operator<=>(X, X); + + bool b1 = X() < X(); // no warning + bool b2 = X() > X(); // no warning + + // FIXME: It's not clear whether warning here is useful, but we can't really + // tell that this is a comparison category in general. This is probably OK, + // as comparisons against zero are only really intended for use in the + // implicit rewrite rules, not for explicit use by programs. + bool c = cmp_cat() < 0; // expected-warning {{zero as null pointer constant}} +} From 2a917b70e770e2d25d96f91beebf2a3e52bb9e66 Mon Sep 17 00:00:00 2001 From: Stephen Kelly Date: Wed, 3 Feb 2021 23:04:12 +0000 Subject: [PATCH 050/244] Extend release notes for AST Matchers changes --- clang/docs/ReleaseNotes.rst | 38 +++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a34cd512ca59..9efd4c01f053 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -250,15 +250,41 @@ release of Clang. Users of the build system should adjust accordingly. AST Matchers ------------ -- The behavior of TK_IgnoreUnlessSpelledInSource with the traverse() matcher - has been changed to no longer match on template instantiations or on +- The ``mapAnyOf()`` matcher was added. This allows convenient matching of + different AST nodes which have a compatible matcher API. For example, + ``mapAnyOf(ifStmt, forStmt).with(hasCondition(integerLiteral()))`` + matches any ``IfStmt`` or ``ForStmt`` with a integer literal as the + condition. + +- The ``binaryOperation()`` matcher allows matching expressions which + appear like binary operators in the code, even if they are really + ``CXXOperatorCallExpr`` for example. It is based on the ``mapAnyOf()`` + matcher functionality. The matcher API for the latter node has been + extended with ``hasLHS()`` etc to facilitate the abstraction. + +- Matcher API for ``CXXRewrittenBinaryOperator`` has been added. In addition + to explicit matching with the ``cxxRewrittenBinaryOperator()`` matcher, the + ``binaryOperation()`` matches on nodes of this type. + +- The behavior of ``TK_IgnoreUnlessSpelledInSource`` with the ``traverse()`` + matcher has been changed to no longer match on template instantiations or on implicit nodes which are not spelled in the source. -- The TK_IgnoreImplicitCastsAndParentheses traversal kind was removed. It - is recommended to use TK_IgnoreUnlessSpelledInSource instead. +- The ``TK_IgnoreImplicitCastsAndParentheses`` traversal kind was removed. It + is recommended to use ``TK_IgnoreUnlessSpelledInSource`` instead. -- The behavior of the forEach() matcher was changed to not internally ignore - implicit and parenthesis nodes. +- The behavior of the ``forEach()`` matcher was changed to not internally + ignore implicit and parenthesis nodes. This makes it consistent with + the ``has()`` matcher. Uses of ``forEach()`` relying on the old behavior + can now use the ``traverse()`` matcher or ``ignoringParenCasts()``. + +- Several AST Matchers have been changed to match based on the active + traversal mode. For example, ``argumentCountIs()`` matches the number of + arguments written in the source, ignoring default arguments represented + by ``CXXDefaultArgExpr`` nodes. + +- Improvements in AST Matchers allow more matching of template declarations, + independent of their template instantations. clang-format ------------ From f5602e0bf31ab590da19fa357980a753dbfd666e Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 07:24:19 -0500 Subject: [PATCH 051/244] [OpenMP] Disabled profiling in `libomp` by default to unblock link errors Link error occurred when time profiling in libomp is enabled by default because `libomp` is assumed to be a C library but the dependence on `libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all OpenMP tests in Phabricator. This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to enable/disable the feature. By default it is disabled. Note that once time profiling is enabled for `libomp`, it becomes a C++ library. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95585 (cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2) --- openmp/CMakeLists.txt | 6 ++++++ openmp/docs/design/Runtimes.rst | 5 ++++- openmp/runtime/CMakeLists.txt | 6 +++--- openmp/runtime/src/CMakeLists.txt | 12 +++++++++++- openmp/runtime/src/kmp_config.h.cmake | 4 ++-- openmp/runtime/src/kmp_runtime.cpp | 6 +++--- 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 67600bebdafb..4787d4b5a321 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -86,6 +86,12 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) +option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) + +# Build host runtime library, after LIBOMPTARGET variables are set since they are needed +# to enable time profiling support in the OpenMP runtime. +add_subdirectory(runtime) + if (OPENMP_ENABLE_LIBOMPTARGET) # Check that the library can actually be built. if (APPLE OR WIN32) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 016b88ba324b..ad36e43eccdc 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ for time trace output. Using this library is enabled by default when building using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will -be saved to the filename specified by the environment variable. +be saved to the filename specified by the environment variable. For multi-threaded +applications, profiling in ``libomp`` is also needed. Setting the CMake option +``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will +turn ``libomp`` into a C++ library. .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 9fdd04f41646..8828ff8ef455 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD}) # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") - set(LIBOMPTARGET_PROFILING_SUPPORT FALSE) else() # Part of LLVM build # Determine the native architecture from LLVM. string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH) @@ -66,10 +65,11 @@ else() # Part of LLVM build libomp_get_architecture(LIBOMP_ARCH) endif () set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS}) - # Time profiling support - set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING}) endif() +# Time profiling support +set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING}) + # FUJITSU A64FX is a special processor because its cache line size is 256. # We need to pass this information into kmp_config.h. if(LIBOMP_ARCH STREQUAL "aarch64") diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 2e927df84f5c..822f9ca2b825 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,6 +50,14 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() +# Building with time profiling support requires LLVM directory includes. +if(LIBOMP_PROFILING_SUPPORT) + include_directories( + ${LLVM_MAIN_INCLUDE_DIR} + ${LLVM_INCLUDE_DIR} + ) +endif() + # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) @@ -135,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS) libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS) # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled. -if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) +if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING)) add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES}) # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}) @@ -144,6 +152,8 @@ else() LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS} LINK_COMPONENTS Support ) + # libomp must be a C++ library such that it can link libLLVMSupport + set(LIBOMP_LINKER_LANGUAGE CXX) endif() set_target_properties(omp PROPERTIES diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 3d682c690fc7..f6aee7197ee8 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT -#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT +#cmakedefine01 LIBOMP_PROFILING_SUPPORT +#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 4a0634d59cff..a6e32bd008e1 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; From 7d096f9bb350429628c6befce8f94dba4bbc6db9 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Wed, 27 Jan 2021 16:04:11 -0800 Subject: [PATCH 052/244] [CSSPGO] Support of CS profiles in extended binary format. This change brings up support of context-sensitive profiles in the format of extended binary. Existing sample profile reader/writer/merger code is being tweaked to reflect the fact of bracketed input contexts, like (`[...]`). The paired brackets are also needed in extbinary profiles because we don't yet have an otherwise good way to tell calling contexts apart from regular function names since the context delimiter `@` can somehow serve as a part of the C++ mangled names. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D95547 (cherry picked from commit 7e99bddfeaab2713a8bb6ca538da25b66e6efc59) --- llvm/include/llvm/ProfileData/SampleProf.h | 19 ++-- .../llvm/ProfileData/SampleProfReader.h | 4 + llvm/lib/ProfileData/SampleProfReader.cpp | 86 ++++++++++--------- llvm/lib/ProfileData/SampleProfWriter.cpp | 4 +- .../Transforms/IPO/SampleContextTracker.cpp | 2 +- .../SampleProfile/profile-context-tracker.ll | 4 + .../llvm-profdata/Inputs/cs-sample.proftext | 36 ++++++++ .../llvm-profdata/cs-sample-profile.test | 4 + llvm/tools/llvm-profdata/llvm-profdata.cpp | 2 +- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 2 +- 10 files changed, 113 insertions(+), 50 deletions(-) create mode 100644 llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext create mode 100644 llvm/test/tools/llvm-profdata/cs-sample-profile.test diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index c45ace9e68c1..346bc4c81d86 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -439,9 +439,11 @@ class SampleContext { void clearState(ContextStateMask S) { State &= (uint32_t)~S; } bool hasContext() const { return State != UnknownContext; } bool isBaseContext() const { return CallingContext.empty(); } - StringRef getName() const { return Name; } + StringRef getNameWithoutContext() const { return Name; } StringRef getCallingContext() const { return CallingContext; } - StringRef getNameWithContext() const { return FullContext; } + StringRef getNameWithContext(bool WithBracket = false) const { + return WithBracket ? InputContext : FullContext; + } private: // Give a context string, decode and populate internal states like @@ -449,6 +451,7 @@ class SampleContext { // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` void setContext(StringRef ContextStr, ContextStateMask CState) { assert(!ContextStr.empty()); + InputContext = ContextStr; // Note that `[]` wrapped input indicates a full context string, otherwise // it's treated as context-less function name only. bool HasContext = ContextStr.startswith("["); @@ -480,6 +483,9 @@ class SampleContext { } } + // Input context string including bracketed calling context and leaf function + // name + StringRef InputContext; // Full context string including calling context and leaf function name StringRef FullContext; // Function name for the associated sample profile @@ -676,7 +682,8 @@ class FunctionSamples { Name = Other.getName(); if (!GUIDToFuncNameMap) GUIDToFuncNameMap = Other.GUIDToFuncNameMap; - + if (Context.getNameWithContext(true).empty()) + Context = Other.getContext(); if (FunctionHash == 0) { // Set the function hash code for the target profile. FunctionHash = Other.getFunctionHash(); @@ -743,8 +750,10 @@ class FunctionSamples { StringRef getName() const { return Name; } /// Return function name with context. - StringRef getNameWithContext() const { - return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name; + StringRef getNameWithContext(bool WithBracket = false) const { + return FunctionSamples::ProfileIsCS + ? Context.getNameWithContext(WithBracket) + : Name; } /// Return the original function name. diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 3f52a2f6163b..999e75eddffa 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -488,8 +488,12 @@ class SampleProfileReader { /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; + /// Whether function profiles are context-sensitive. bool ProfileIsCS = false; + /// Number of context-sensitive profiles. + uint32_t CSProfileCount = 0; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index c42931174bc0..c9f41687c356 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -222,8 +222,6 @@ std::error_code SampleProfileReaderText::readImpl() { sampleprof_error Result = sampleprof_error::success; InlineCallStack InlineStack; - int CSProfileCount = 0; - int RegularProfileCount = 0; uint32_t ProbeProfileCount = 0; // SeenMetadata tracks whether we have processed metadata for the current @@ -257,11 +255,9 @@ std::error_code SampleProfileReaderText::readImpl() { SampleContext FContext(FName); if (FContext.hasContext()) ++CSProfileCount; - else - ++RegularProfileCount; Profiles[FContext] = FunctionSamples(); FunctionSamples &FProfile = Profiles[FContext]; - FProfile.setName(FContext.getName()); + FProfile.setName(FContext.getNameWithoutContext()); FProfile.setContext(FContext); MergeResult(Result, FProfile.addTotalSamples(NumSamples)); MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); @@ -324,13 +320,14 @@ std::error_code SampleProfileReaderText::readImpl() { } } - assert((RegularProfileCount == 0 || CSProfileCount == 0) && + assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && "Cannot have both context-sensitive and regular profile"); ProfileIsCS = (CSProfileCount > 0); assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) && "Cannot have both probe-based profiles and regular profiles"); ProfileIsProbeBased = (ProbeProfileCount > 0); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; + FunctionSamples::ProfileIsCS = ProfileIsCS; if (Result == sampleprof_error::success) computeSummary(); @@ -546,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { if (std::error_code EC = FName.getError()) return EC; - Profiles[*FName] = FunctionSamples(); - FunctionSamples &FProfile = Profiles[*FName]; - FProfile.setName(*FName); - + SampleContext FContext(*FName); + Profiles[FContext] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FContext]; + FProfile.setName(FContext.getNameWithoutContext()); + FProfile.setContext(FContext); FProfile.addHeadSamples(*NumHeadSamples); + if (FContext.hasContext()) + CSProfileCount++; + if (std::error_code EC = readProfile(FProfile)) return EC; return sampleprof_error::success; @@ -654,40 +655,44 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { return EC; } assert(Data == End && "More data is read than expected"); - return sampleprof_error::success; - } - - if (Remapper) { - for (auto Name : FuncsToUse) { - Remapper->insert(Name); + } else { + if (Remapper) { + for (auto Name : FuncsToUse) { + Remapper->insert(Name); + } } - } - if (useMD5()) { - for (auto Name : FuncsToUse) { - auto GUID = std::to_string(MD5Hash(Name)); - auto iter = FuncOffsetTable.find(StringRef(GUID)); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } else { - for (auto NameOffset : FuncOffsetTable) { - auto FuncName = NameOffset.first; - if (!FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName))) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - assert(FuncProfileAddr < End && "out of LBRProfile section"); - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; + if (useMD5()) { + for (auto Name : FuncsToUse) { + auto GUID = std::to_string(MD5Hash(Name)); + auto iter = FuncOffsetTable.find(StringRef(GUID)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + } else { + for (auto NameOffset : FuncOffsetTable) { + SampleContext FContext(NameOffset.first); + auto FuncName = FContext.getNameWithoutContext(); + if (!FuncsToUse.count(FuncName) && + (!Remapper || !Remapper->exist(FuncName))) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + assert(FuncProfileAddr < End && "out of LBRProfile section"); + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } } + Data = End; } - Data = End; + assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && + "Cannot have both context-sensitive and regular profile"); + ProfileIsCS = (CSProfileCount > 0); + FunctionSamples::ProfileIsCS = ProfileIsCS; return sampleprof_error::success; } @@ -887,7 +892,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { if (std::error_code EC = Checksum.getError()) return EC; - Profiles[*FName].setFunctionHash(*Checksum); + SampleContext FContext(*FName); + Profiles[FContext].setFunctionHash(*Checksum); } return sampleprof_error::success; } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp index 71dba6281f76..d3bc05e06fdf 100644 --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -147,7 +147,7 @@ std::error_code SampleProfileWriterExtBinaryBase::write( std::error_code SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) { uint64_t Offset = OutputStream->tell(); - StringRef Name = S.getName(); + StringRef Name = S.getNameWithContext(true); FuncOffsetTable[Name] = Offset - SecLBRProfileStart; encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(S); @@ -635,7 +635,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() { std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { auto &OS = *OutputStream; - if (std::error_code EC = writeNameIdx(S.getName())) + if (std::error_code EC = writeNameIdx(S.getNameWithContext(true))) return EC; encodeULEB128(S.getTotalSamples(), OS); diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 37fc27e91100..660d79de667c 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -179,7 +179,7 @@ SampleContextTracker::SampleContextTracker( SampleContext Context(FuncSample.first(), RawContext); LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n"); if (!Context.isBaseContext()) - FuncToCtxtProfileSet[Context.getName()].insert(FSamples); + FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples); ContextTrieNode *NewNode = getOrCreateContextPath(Context, true); assert(!NewNode->getFunctionSamples() && "New node can't have sample profile"); diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll index ed32c2a0027b..adda7022047d 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll @@ -1,18 +1,22 @@ ; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly ; based on inline decision, so post inline counts are accurate. +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t + ; Note that we need new pass manager to enable top-down processing for sample profile loader ; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL ; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; main:3 @ _Z5funcAi ; _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT @factor = dso_local global i32 3, align 4, !dbg !0 diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext new file mode 100644 index 000000000000..eead4d4d62f0 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext @@ -0,0 +1,36 @@ +[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11 + 0: 6 + 1: 6 + 3: 287884 + 4: 287864 _Z3fibi:315608 + 15: 23 +[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20 + 0: 15 + 1: 15 + 3: 74946 + 4: 74941 _Z3fibi:82359 + 10: 23324 + 11: 23327 _Z3fibi:25228 + 15: 11 +[main]:154:0 + 2: 12 + 3: 18 _Z5funcAi:11 + 3.1: 18 _Z5funcBi:19 +[external:12 @ main]:154:12 + 2: 12 + 3: 10 _Z5funcAi:7 + 3.1: 10 _Z5funcBi:11 +[main:3.1 @ _Z5funcBi]:120:19 + 0: 19 + 1: 19 _Z8funcLeafi:20 + 3: 12 +[externalA:17 @ _Z5funcBi]:120:3 + 0: 3 + 1: 3 +[external:10 @ _Z5funcBi]:120:10 + 0: 10 + 1: 10 +[main:3 @ _Z5funcAi]:99:11 + 0: 10 + 1: 10 _Z8funcLeafi:11 + 3: 24 diff --git a/llvm/test/tools/llvm-profdata/cs-sample-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-profile.test new file mode 100644 index 000000000000..04c573ddece3 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/cs-sample-profile.test @@ -0,0 +1,4 @@ +RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext +RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext +RUN: llvm-profdata merge --sample --extbinary %p/Inputs/cs-sample.proftext -o %t.prof && llvm-profdata merge --sample --text %t.prof -o %t1.proftext +RUN: diff -b %t1.proftext %S/Inputs/cs-sample.proftext diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 8dc43924c067..7e53c30c7579 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -696,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, Remapper ? remapSamples(I->second, *Remapper, Result) : FunctionSamples(); FunctionSamples &Samples = Remapper ? Remapped : I->second; - StringRef FName = Samples.getName(); + StringRef FName = Samples.getNameWithContext(true); MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight)); if (Result != sampleprof_error::success) { std::error_code EC = make_error_code(Result); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 265beccb84a8..7624fd3f2808 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -164,7 +164,7 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) { if (Ret.second) { SampleContext FContext(Ret.first->first(), RawContext); FunctionSamples &FProfile = Ret.first->second; - FProfile.setName(FContext.getName()); + FProfile.setName(FContext.getNameWithoutContext()); FProfile.setContext(FContext); } return Ret.first->second; From f2cabaac9525ba4b86301136e21ec9aad6aaf326 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Sun, 31 Jan 2021 22:31:51 -0800 Subject: [PATCH 053/244] [CSSPGO] Tweaking inlining with pseudo probes. Fixing up a couple places where `getCallSiteIdentifier` is needed to support pseudo-probe-based callsites. Also fixing an issue in the extbinary profile reader where the metadata section is not fully scanned based on the number of profiles loaded only for the current module. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D95791 (cherry picked from commit 224fee8219bb3aed34f13ce40935e1b3ede90a0f) --- llvm/lib/ProfileData/SampleProfReader.cpp | 9 +- .../Transforms/IPO/SampleContextTracker.cpp | 11 +- .../Inputs/pseudo-probe-inline.prof | 18 ++ .../SampleProfile/pseudo-probe-inline.ll | 175 ++++++++++++++++++ 4 files changed, 204 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index c9f41687c356..370ffc8e2885 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -883,7 +883,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) { std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { if (!ProfileIsProbeBased) return sampleprof_error::success; - for (unsigned I = 0; I < Profiles.size(); ++I) { + while (Data < End) { auto FName(readStringFromTable()); if (std::error_code EC = FName.getError()) return EC; @@ -893,8 +893,13 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() { return EC; SampleContext FContext(*FName); - Profiles[FContext].setFunctionHash(*Checksum); + // No need to load metadata for profiles that are not loaded in the current + // module. + if (Profiles.count(FContext)) + Profiles[FContext].setFunctionHash(*Checksum); } + + assert(Data == End && "More data is read than expected"); return sampleprof_error::success; } diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index 660d79de667c..fad72985dedd 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -308,8 +308,7 @@ void SampleContextTracker::promoteMergeContextSamplesTree( return; // Get the context that needs to be promoted - LineLocation CallSite(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()); + LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); ContextTrieNode *NodeToPromo = CallerNode->getChildContext(CallSite, CalleeName); if (!NodeToPromo) @@ -370,9 +369,7 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL, return nullptr; return CallContext->getChildContext( - LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()), - CalleeName); + FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); } ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { @@ -386,8 +383,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) { if (Name.empty()) Name = PrevDIL->getScope()->getSubprogram()->getName(); S.push_back( - std::make_pair(LineLocation(FunctionSamples::getOffset(DIL), - DIL->getBaseDiscriminator()), Name)); + std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof new file mode 100644 index 000000000000..fd3ff773e85d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof @@ -0,0 +1,18 @@ +[foo]:23:23 + 1: 23 + 2: 23 zen:23 + !CFGChecksum: 281479271677951 +[foo:2 @ zen]:765858:23 + 1: 23 + 2: 382920 + 3: 382915 + !CFGChecksum: 138828622701 +[bar]:23:23 + 1: 23 + 2: 23 zen:23 + !CFGChecksum: 281479271677951 +[bar:2 @ zen]:765858:23 + 1: 23 + 2: 382920 + 3: 382915 + !CFGChecksum: 138828622701 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll new file mode 100644 index 000000000000..a5033a0dc190 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -0,0 +1,175 @@ +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s +; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml + +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2 +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s +; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@factor = dso_local global i32 3, align 4 + +define dso_local i32 @foo(i32 %x) #0 !dbg !12 { +entry: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0) + %add = add nsw i32 %x, 100000, !dbg !19 +;; Check zen is fully inlined so there's no call to zen anymore. +;; Check code from the inlining of zen is properly annotated here. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK-NOT: call i32 @zen + %call = call i32 @zen(i32 %add), !dbg !20 + ret i32 %call, !dbg !21 +} + +; CHECK: define dso_local i32 @zen +define dso_local i32 @zen(i32 %x) #0 !dbg !22 { +entry: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0) + %cmp = icmp sgt i32 %x, 0, !dbg !26 + br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28 + +while.cond: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) + %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ] + %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29 + br i1 %cmp1, label %while.body, label %if.end, !dbg !31 + +while.body: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) + %0 = load volatile i32, i32* @factor, align 4, !dbg !32 + %sub = sub nsw i32 %x.addr.0, %0, !dbg !39 + br label %while.cond, !dbg !31 + +while.cond2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) + %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ] + %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !44 + +while.body4: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) + %1 = load volatile i32, i32* @factor, align 4, !dbg !45 + %add = add nsw i32 %x.addr.1, %1, !dbg !48 + br label %while.cond2, !dbg !44 + +if.end: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) + %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ] + ret i32 %x.addr.2, !dbg !51 +} + +; CHECK: !llvm.pseudo_probe_desc = !{![[#DESC0:]], ![[#DESC1:]]} +; CHECK: ![[#DESC0]] = !{i64 [[#GUID1]], i64 [[#HASH1:]], !"foo"} +; CHECK: ![[#DESC1]] = !{i64 [[#GUID2]], i64 [[#HASH2:]], !"zen"} +; CHECK: ![[PD1]] = !{!"branch_weights", i32 25, i32 1} +; CHECK: ![[PD2]] = !{!"branch_weights", i32 382916, i32 25} + +; Checking to see if YAML file is generated and contains remarks +;YAML: --- !Passed +;YAML-NEXT: Pass: sample-profile-inline +;YAML-NEXT: Name: Inlined +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 10, Column: 11 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - Callee: zen +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 38, Column: 0 } +;YAML-NEXT: - String: ' inlined into ' +;YAML-NEXT: - Caller: foo +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 9, Column: 0 } +;YAML-NEXT: - String: ' to match profiling context' +;YAML-NEXT: - String: ' with ' +;YAML-NEXT: - String: '(cost=' +;YAML-NEXT: - Cost: '15' +;YAML-NEXT: - String: ', threshold=' +;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - String: ')' +;YAML-NEXT: - String: ' at callsite ' +;YAML-NEXT: - String: foo +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Line: '1' +;YAML-NEXT: - String: ':' +;YAML-NEXT: - Column: '11' +;YAML-NEXT: - String: ';' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 10, Column: 22 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '23' +;YAML-NEXT: - String: ' samples from profile (ProbeId=' +;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 39, Column: 9 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '23' +;YAML-NEXT: - String: ' samples from profile (ProbeId=' +;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: test.cpp, Line: 41, Column: 14 } +;YAML-NEXT: Function: foo +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '382920' +;YAML-NEXT: - String: ' samples from profile (ProbeId=' +;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... + +attributes #0 = {"use-sample-profile"} + +!llvm.module.flags = !{!8, !9} + +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3) +!3 = !DIFile(filename: "test.cpp", directory: "test") +!4 = !{} +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{i32 7, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!12 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 9, type: !13, scopeLine: 9, unit: !2) +!13 = !DISubroutineType(types: !14) +!14 = !{!7, !7} +!18 = !DILocation(line: 0, scope: !12) +!19 = !DILocation(line: 10, column: 22, scope: !12) +!20 = !DILocation(line: 10, column: 11, scope: !12) +!21 = !DILocation(line: 12, column: 3, scope: !12) +!22 = distinct !DISubprogram(name: "zen", scope: !3, file: !3, line: 37, type: !13, scopeLine: 38, unit: !2) +!25 = !DILocation(line: 0, scope: !22) +!26 = !DILocation(line: 39, column: 9, scope: !27) +!27 = distinct !DILexicalBlock(scope: !22, file: !3, line: 39, column: 7) +!28 = !DILocation(line: 39, column: 7, scope: !22) +!29 = !DILocation(line: 41, column: 14, scope: !30) +!30 = distinct !DILexicalBlock(scope: !27, file: !3, line: 39, column: 14) +!31 = !DILocation(line: 41, column: 5, scope: !30) +!32 = !DILocation(line: 42, column: 16, scope: !33) +!33 = distinct !DILexicalBlock(scope: !30, file: !3, line: 41, column: 19) +!38 = !DILocation(line: 42, column: 12, scope: !33) +!39 = !DILocation(line: 42, column: 9, scope: !33) +!42 = !DILocation(line: 48, column: 14, scope: !43) +!43 = distinct !DILexicalBlock(scope: !27, file: !3, line: 46, column: 8) +!44 = !DILocation(line: 48, column: 5, scope: !43) +!45 = !DILocation(line: 49, column: 16, scope: !46) +!46 = distinct !DILexicalBlock(scope: !43, file: !3, line: 48, column: 19) +!47 = !DILocation(line: 49, column: 12, scope: !46) +!48 = !DILocation(line: 49, column: 9, scope: !46) +!51 = !DILocation(line: 53, column: 3, scope: !22) From b9fa16f2234edddf6e0f449a0e7b646ee9046cf3 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Fri, 22 Jan 2021 15:52:46 -0800 Subject: [PATCH 054/244] [CSSPGO] Passing the clang driver switch -fpseudo-probe-for-profiling to the linker. As titled. Reviewed By: wmi, wenlei Differential Revision: https://reviews.llvm.org/D95271 (cherry picked from commit d3e2e3740d0730cb6788c771bb01a8f3e935bf2e) --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 +++++ clang/test/Driver/pseudo-probe-lto.c | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/pseudo-probe-lto.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 42c5319041d0..1f6c13d5cc96 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1147,7 +1147,7 @@ def fprofile_update_EQ : Joined<["-"], "fprofile-update=">, defm pseudo_probe_for_profiling : BoolFOption<"pseudo-probe-for-profiling", CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse, PosFlag, NegFlag, - BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiler">>; + BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiling">>; def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 6a95aa5ec628..bcaea71dca94 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -605,6 +605,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back("-plugin-opt=new-pass-manager"); } + // Pass an option to enable pseudo probe emission. + if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling, + options::OPT_fno_pseudo_probe_for_profiling, false)) + CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling"); + // Setup statistics file output. SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D); if (!StatsFile.empty()) diff --git a/clang/test/Driver/pseudo-probe-lto.c b/clang/test/Driver/pseudo-probe-lto.c new file mode 100644 index 000000000000..e319b8c0098b --- /dev/null +++ b/clang/test/Driver/pseudo-probe-lto.c @@ -0,0 +1,10 @@ +// RUN: touch %t.o +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto=thin -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fno-pseudo-probe-for-profiling -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 2>&1 | FileCheck %s --check-prefix=NOPROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE +// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fpseudo-probe-for-profiling -fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE + +// PROBE: -plugin-opt=pseudo-probe-for-profiling +// NOPROBE-NOT: -plugin-opt=pseudo-probe-for-profiling From 27ff658e97528540e4425c0cb6400f3e5355f53a Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Sun, 3 Jan 2021 16:43:06 -0800 Subject: [PATCH 055/244] [CSSPGO] Call site prioritized inlining for sample PGO This change implemented call site prioritized BFS profile guided inlining for sample profile loader. The new inlining strategy maximize the benefit of context-sensitive profile as mentioned in the follow up discussion of CSSPGO RFC. The change will not affect today's AutoFDO as it's opt-in. CSSPGO now defaults to the new FDO inliner, but can fall back to today's replay inliner using a switch (`-sample-profile-prioritized-inline=0`). Motivation With baseline AutoFDO, the inliner in sample profile loader only replays previous inlining, and the use of profile is only for pruning previous inlining that turned out to be cold. Due to the nature of replay, the FDO inliner is simple with hotness being the only decision factor. It has the following limitations that we're improving now for CSSPGO. - It doesn't take inline candidate size into account. Since it's doing replay, the size growth is bounded by previous CGSCC inlining. With context-sensitive profile, FDO inliner is no longer limited by previous inlining, so we need to take size into account to avoid significant size bloat. - The way it looks at hotness is not accurate. It uses total samples in an inlinee as proxy for hotness, while what really matters for an inline decision is the call site count. This is an unfortunate fall back because call site count and callee entry count are not reliable due to dwarf based correlation, especially for inlinees. Now paired with pseudo-probe, we have accurate call site count and callee's entry count, so we can use that to gauge hotness more accurately. - It treats all call sites from a block as hot as long as there's one call site considered hot. This is normally true, but since total samples is used as hotness proxy, this transitiveness within block magnifies the inacurate hotness heuristic. With pseduo-probe and the change above, this is no longer an issue for CSSPGO. New FDO Inliner Putting all the requirement for CSSPGO together, we need a top-down call site prioritized BFS inliner. Here're reasons why each component is needed. - Top-down: We need a top-down inliner to better leverage context-sensitive profile, so inlining is driven by accurate context profile, and post-inline is also accurate. This is already implemented in https://reviews.llvm.org/D70655. - Size Cap: For top-down inliner, taking function size into account for inline decision alone isn't sufficient to control size growth. We also need to explicitly cap size growth because with top-down inlining, we can grow inliner size significantly with large number of smaller inlinees even if each individually passes the cost/size check. - Prioritize call sites: With size cap, inlining order also becomes important, because if we stop inlining due to size budget limit, we'd want to use budget towards the most beneficial call sites. - BFS inline: Same as call site prioritization, if we stop inlining due to size budget limit, we want a balanced inline tree, rather than going deep on one call path. Note that the new inliner avoids repeatedly evaluating same set of call site, so it should help with compile time too. For this reason, we could transition today's FDO inliner to use a queue with equal priority to avoid wasted reevaluation of same call site (TODO). Speculative indirect call promotion and inlining is also supported now with CSSPGO just like baseline AutoFDO. Tunings and knobs I created tuning knobs for size growth/cap control, and for hot threshold separate from CGSCC inliner. The default values are selected based on initial tuning with CSSPGO. Results Evaluated with an internal LLVM fork couple months ago, plus another change to adjust hot-threshold cutoff for context profile (will send up after this one), the new inliner show ~1% geomean perf win on spec2006 with CSSPGO, while reducing code size too. The measurement was done using train-train setup, MonoLTO w/ new pass manager and pseudo-probe. Note that this is just a starting point - we hope that the new inliner will open up more opportunity with CSSPGO, but it will certainly take more time and effort to make it fully calibrated and ready for bigger workloads (we're working on it). Differential Revision: https://reviews.llvm.org/D94001 (cherry picked from commit 6bae5973c476e16dbbc82030d65c7859a6628e89) --- .../Transforms/IPO/SampleContextTracker.h | 6 +- .../Transforms/IPO/SampleContextTracker.cpp | 73 ++- llvm/lib/Transforms/IPO/SampleProfile.cpp | 443 ++++++++++++++++-- .../Inputs/indirect-call-csspgo.prof | 10 + .../SampleProfile/csspgo-inline-debug.ll | 166 +++++++ .../SampleProfile/csspgo-inline-icall.ll | 63 +++ .../Transforms/SampleProfile/csspgo-inline.ll | 180 +++++++ .../profile-context-tracker-debug.ll | 25 +- .../SampleProfile/profile-context-tracker.ll | 15 +- .../SampleProfile/pseudo-probe-inline.ll | 4 +- 10 files changed, 904 insertions(+), 81 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline.ll diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h index 5b2600144fa3..526e141838c4 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -23,6 +23,7 @@ #include "llvm/ProfileData/SampleProf.h" #include #include +#include using namespace llvm; using namespace sampleprof; @@ -42,7 +43,7 @@ class ContextTrieNode { CallSiteLoc(CallLoc){}; ContextTrieNode *getChildContext(const LineLocation &CallSite, StringRef CalleeName); - ContextTrieNode *getChildContext(const LineLocation &CallSite); + ContextTrieNode *getHottestChildContext(const LineLocation &CallSite); ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate = true); @@ -94,6 +95,9 @@ class SampleContextTracker { // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, StringRef CalleeName); + // Get samples for indirect call targets for call site at given location. + std::vector + getIndirectCalleeContextSamplesFor(const DILocation *DIL); // Query context profile for a given location. The full context // is identified by input DILocation. FunctionSamples *getContextSamplesFor(const DILocation *DIL); diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp index fad72985dedd..41d7f363e1a4 100644 --- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp +++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp @@ -30,7 +30,7 @@ namespace llvm { ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, StringRef CalleeName) { if (CalleeName.empty()) - return getChildContext(CallSite); + return getHottestChildContext(CallSite); uint32_t Hash = nodeHash(CalleeName, CallSite); auto It = AllChildContext.find(Hash); @@ -40,18 +40,22 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite, } ContextTrieNode * -ContextTrieNode::getChildContext(const LineLocation &CallSite) { +ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) { // CSFDO-TODO: This could be slow, change AllChildContext so we can // do point look up for child node by call site alone. - // CSFDO-TODO: Return the child with max count for indirect call + // Retrieve the child node with max count for indirect call ContextTrieNode *ChildNodeRet = nullptr; + uint64_t MaxCalleeSamples = 0; for (auto &It : AllChildContext) { ContextTrieNode &ChildNode = It.second; - if (ChildNode.CallSiteLoc == CallSite) { - if (ChildNodeRet) - return nullptr; - else - ChildNodeRet = &ChildNode; + if (ChildNode.CallSiteLoc != CallSite) + continue; + FunctionSamples *Samples = ChildNode.getFunctionSamples(); + if (!Samples) + continue; + if (Samples->getTotalSamples() > MaxCalleeSamples) { + ChildNodeRet = &ChildNode; + MaxCalleeSamples = Samples->getTotalSamples(); } } @@ -191,12 +195,12 @@ FunctionSamples * SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, StringRef CalleeName) { LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n"); - // CSFDO-TODO: We use CalleeName to differentiate indirect call - // We need to get sample for indirect callee too. DILocation *DIL = Inst.getDebugLoc(); if (!DIL) return nullptr; + // For indirect call, CalleeName will be empty, in which case the context + // profile for callee with largest total samples will be returned. ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName); if (CalleeContext) { FunctionSamples *FSamples = CalleeContext->getFunctionSamples(); @@ -209,6 +213,26 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst, return nullptr; } +std::vector +SampleContextTracker::getIndirectCalleeContextSamplesFor( + const DILocation *DIL) { + std::vector R; + if (!DIL) + return R; + + ContextTrieNode *CallerNode = getContextFor(DIL); + LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); + for (auto &It : CallerNode->getAllChildContext()) { + ContextTrieNode &ChildNode = It.second; + if (ChildNode.getCallSiteLoc() != CallSite) + continue; + if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples()) + R.push_back(CalleeSamples); + } + + return R; +} + FunctionSamples * SampleContextTracker::getContextSamplesFor(const DILocation *DIL) { assert(DIL && "Expect non-null location"); @@ -295,11 +319,6 @@ void SampleContextTracker::promoteMergeContextSamplesTree( const Instruction &Inst, StringRef CalleeName) { LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n" << Inst << "\n"); - // CSFDO-TODO: We also need to promote context profile from indirect - // calls. We won't have callee names from those from call instr. - if (CalleeName.empty()) - return; - // Get the caller context for the call instruction, we don't use callee // name from call because there can be context from indirect calls too. DILocation *DIL = Inst.getDebugLoc(); @@ -309,6 +328,22 @@ void SampleContextTracker::promoteMergeContextSamplesTree( // Get the context that needs to be promoted LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL); + // For indirect call, CalleeName will be empty, in which case we need to + // promote all non-inlined child context profiles. + if (CalleeName.empty()) { + for (auto &It : CallerNode->getAllChildContext()) { + ContextTrieNode *NodeToPromo = &It.second; + if (CallSite != NodeToPromo->getCallSiteLoc()) + continue; + FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples(); + if (FromSamples && FromSamples->getContext().hasState(InlinedContext)) + continue; + promoteMergeContextSamplesTree(*NodeToPromo); + } + return; + } + + // Get the context for the given callee that needs to be promoted ContextTrieNode *NodeToPromo = CallerNode->getChildContext(CallSite, CalleeName); if (!NodeToPromo) @@ -328,6 +363,8 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree( LLVM_DEBUG(dbgs() << " Found context tree root to promote: " << FromSamples->getContext() << "\n"); + assert(!FromSamples->getContext().hasState(InlinedContext) && + "Shouldn't promote inlined context profile"); StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext(); return promoteMergeContextSamplesTree(NodeToPromo, RootContext, ContextStrToRemove); @@ -360,14 +397,12 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL, StringRef CalleeName) { assert(DIL && "Expect non-null location"); - // CSSPGO-TODO: need to support indirect callee - if (CalleeName.empty()) - return nullptr; - ContextTrieNode *CallContext = getContextFor(DIL); if (!CallContext) return nullptr; + // When CalleeName is empty, the child context profile with max + // total samples will be returned. return CallContext->getChildContext( FunctionSamples::getCallSiteIdentifier(DIL), CalleeName); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 264ac4065e8c..665c4078f3ee 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -108,6 +109,14 @@ STATISTIC(NumMismatchedProfile, "Number of functions with CFG mismatched profile"); STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile"); +STATISTIC(NumCSInlinedHitMinLimit, + "Number of functions with FDO inline stopped due to min size limit"); +STATISTIC(NumCSInlinedHitMaxLimit, + "Number of functions with FDO inline stopped due to max size limit"); +STATISTIC( + NumCSInlinedHitGrowthLimit, + "Number of functions with FDO inline stopped due to growth size limit"); + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt SampleProfileFile( @@ -171,6 +180,38 @@ static cl::opt ProfileSizeInline( cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size.")); +static cl::opt ProfileInlineGrowthLimit( + "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12), + cl::desc("The size growth ratio limit for proirity-based sample profile " + "loader inlining.")); + +static cl::opt ProfileInlineLimitMin( + "sample-profile-inline-limit-min", cl::Hidden, cl::init(100), + cl::desc("The lower bound of size growth limit for " + "proirity-based sample profile loader inlining.")); + +static cl::opt ProfileInlineLimitMax( + "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000), + cl::desc("The upper bound of size growth limit for " + "proirity-based sample profile loader inlining.")); + +static cl::opt ProfileICPThreshold( + "sample-profile-icp-threshold", cl::Hidden, cl::init(5), + cl::desc( + "Relative hotness threshold for indirect " + "call promotion in proirity-based sample profile loader inlining.")); + +static cl::opt SampleHotCallSiteThreshold( + "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000), + cl::desc("Hot callsite threshold for proirity-based sample profile loader " + "inlining.")); + +static cl::opt CallsitePrioritizedInline( + "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Use call site prioritized inlining for sample profile loader." + "Currently only CSSPGO is supported.")); + static cl::opt SampleColdCallSiteThreshold( "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining cold callsites")); @@ -313,6 +354,31 @@ class GUIDToFuncNameMapper { DenseMap &CurrentGUIDToFuncNameMap; }; +// Inline candidate used by iterative callsite prioritized inliner +struct InlineCandidate { + CallBase *CallInstr; + const FunctionSamples *CalleeSamples; + uint64_t CallsiteCount; +}; + +// Inline candidate comparer using call site weight +struct CandidateComparer { + bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) { + if (LHS.CallsiteCount != RHS.CallsiteCount) + return LHS.CallsiteCount < RHS.CallsiteCount; + + // Tie breaker using GUID so we have stable/deterministic inlining order + assert(LHS.CalleeSamples && RHS.CalleeSamples && + "Expect non-null FunctionSamples"); + return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) < + RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName()); + } +}; + +using CandidateQueue = + PriorityQueue, + CandidateComparer>; + /// Sample profile pass. /// /// This pass reads profile data from the file specified by @@ -350,9 +416,23 @@ class SampleProfileLoader { findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineCallInstruction(CallBase &CB); + CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName, + uint64_t &Sum, uint64_t Count, CallBase *I, + const char *&Reason); + bool inlineCallInstruction(CallBase &CB, + const FunctionSamples *CalleeSamples); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); + // Helper functions call-site prioritized BFS inliner + // Will change the main FDO inliner to be work list based directly in + // upstream, then merge this change with that and remove the duplication. + InlineCost shouldInlineCandidate(InlineCandidate &Candidate); + bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); + bool tryInlineCandidate(InlineCandidate &Candidate, + SmallVector &InlinedCallSites); + bool + inlineHotFunctionsWithPriority(Function &F, + DenseSet &InlinedGUIDs); // Inline cold/small functions in addition to hot ones bool shouldInlineColdCallee(CallBase &CallInst); void emitOptimizationRemarksForInlineCandidates( @@ -918,6 +998,31 @@ SampleProfileLoader::findIndirectCallFunctionSamples( return R; } + auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { + assert(L && R && "Expect non-null FunctionSamples"); + if (L->getEntrySamples() != R->getEntrySamples()) + return L->getEntrySamples() > R->getEntrySamples(); + return FunctionSamples::getGUID(L->getName()) < + FunctionSamples::getGUID(R->getName()); + }; + + if (ProfileIsCS) { + auto CalleeSamples = + ContextTracker->getIndirectCalleeContextSamplesFor(DIL); + if (CalleeSamples.empty()) + return R; + + // For CSSPGO, we only use target context profile's entry count + // as that already includes both inlined callee and non-inlined ones.. + Sum = 0; + for (const auto *const FS : CalleeSamples) { + Sum += FS->getEntrySamples(); + R.push_back(FS); + } + llvm::sort(R, FSCompare); + return R; + } + const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return R; @@ -935,12 +1040,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( Sum += NameFS.second.getEntrySamples(); R.push_back(&NameFS.second); } - llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) { - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); - return FunctionSamples::getGUID(L->getName()) < - FunctionSamples::getGUID(R->getName()); - }); + llvm::sort(R, FSCompare); } return R; } @@ -977,7 +1077,32 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { +CallBase * +SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName, + uint64_t &Sum, uint64_t Count, + CallBase *I, const char *&Reason) { + Reason = "Callee function not available"; + // R->getValue() != &F is to prevent promoting a recursive call. + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will + // simply ignore it instead of handling it explicitly. + auto R = SymbolMap.find(CalleeName); + if (R != SymbolMap.end() && R->getValue() && + !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && + R->getValue()->hasFnAttribute("use-sample-profile") && + R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { + auto *DI = + &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE); + Sum -= Count; + return DI; + } + return nullptr; +} + +bool SampleProfileLoader::inlineCallInstruction( + CallBase &CB, const FunctionSamples *CalleeSamples) { if (ExternalInlineAdvisor) { auto Advice = ExternalInlineAdvisor->getAdvice(CB); if (!Advice->isInliningRecommended()) { @@ -1012,6 +1137,9 @@ bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) { // The call to InlineFunction erases I, so we can't pass it here. emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, true, CSINLINE_DEBUG); + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined(CalleeSamples); + ++NumCSInlined; return true; } return false; @@ -1129,34 +1257,17 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - const char *Reason = "Callee function not available"; - // R->getValue() != &F is to prevent promoting a recursive call. - // If it is a recursive call, we do not inline it as it could bloat - // the code exponentially. There is way to better handle this, e.g. - // clone the caller first, and inline the cloned caller if it is - // recursive. As llvm does not inline recursive calls, we will - // simply ignore it instead of handling it explicitly. + const char *Reason = nullptr; auto CalleeFunctionName = FS->getFuncName(); - auto R = SymbolMap.find(CalleeFunctionName); - if (R != SymbolMap.end() && R->getValue() && - !R->getValue()->isDeclaration() && - R->getValue()->getSubprogram() && - R->getValue()->hasFnAttribute("use-sample-profile") && - R->getValue() != &F && - isLegalToPromote(*I, R->getValue(), &Reason)) { - uint64_t C = FS->getEntrySamples(); - auto &DI = - pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE); - Sum -= C; + if (CallBase *DI = + tryPromoteIndirectCall(F, CalleeFunctionName, Sum, + FS->getEntrySamples(), I, Reason)) { PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && - inlineCallInstruction(cast(DI))) { - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined(FS); + inlineCallInstruction(cast(*DI), FS)) { localNotInlinedCallSites.erase(I); LocalChanged = true; - ++NumCSInlined; } } else { LLVM_DEBUG(dbgs() @@ -1166,13 +1277,11 @@ bool SampleProfileLoader::inlineHotFunctions( } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(*I)) { - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined( - localNotInlinedCallSites[I]); + if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I) + ? localNotInlinedCallSites[I] + : nullptr)) { localNotInlinedCallSites.erase(I); LocalChanged = true; - ++NumCSInlined; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { findCalleeFunctionSamples(*I)->findInlinedFunctions( @@ -1186,6 +1295,11 @@ bool SampleProfileLoader::inlineHotFunctions( } } + // For CS profile, profile for not inlined context will be merged when + // base profile is being trieved + if (ProfileIsCS) + return Changed; + // Accumulate not inlined callsite information into notInlinedSamples for (const auto &Pair : localNotInlinedCallSites) { CallBase *I = Pair.getFirst(); @@ -1232,6 +1346,254 @@ bool SampleProfileLoader::inlineHotFunctions( return Changed; } +bool SampleProfileLoader::tryInlineCandidate( + InlineCandidate &Candidate, SmallVector &InlinedCallSites) { + + CallBase &CB = *Candidate.CallInstr; + Function *CalledFunction = CB.getCalledFunction(); + assert(CalledFunction && "Expect a callee with definition"); + DebugLoc DLoc = CB.getDebugLoc(); + BasicBlock *BB = CB.getParent(); + + InlineCost Cost = shouldInlineCandidate(Candidate); + if (Cost.isNever()) { + ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) + << "incompatible inlining"); + return false; + } + + if (!Cost) + return false; + + InlineFunctionInfo IFI(nullptr, GetAC); + if (InlineFunction(CB, IFI).isSuccess()) { + // The call to InlineFunction erases I, so we can't pass it here. + emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, + true, CSINLINE_DEBUG); + + // Now populate the list of newly exposed call sites. + InlinedCallSites.clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites.push_back(I); + + if (ProfileIsCS) + ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); + ++NumCSInlined; + return true; + } + return false; +} + +bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, + CallBase *CB) { + assert(CB && "Expect non-null call instruction"); + + if (isa(CB)) + return false; + + // Find the callee's profile. For indirect call, find hottest target profile. + const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB); + if (!CalleeSamples) + return false; + + uint64_t CallsiteCount = 0; + ErrorOr Weight = getBlockWeight(CB->getParent()); + if (Weight) + CallsiteCount = Weight.get(); + if (CalleeSamples) + CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples()); + + *NewCandidate = {CB, CalleeSamples, CallsiteCount}; + return true; +} + +InlineCost +SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { + assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); + + std::unique_ptr Advice = nullptr; + if (ExternalInlineAdvisor) { + Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); + if (!Advice->isInliningRecommended()) { + Advice->recordUnattemptedInlining(); + return InlineCost::getNever("not previously inlined"); + } + Advice->recordInlining(); + return InlineCost::getAlways("previously inlined"); + } + + // Adjust threshold based on call site hotness, only do this for callsite + // prioritized inliner because otherwise cost-benefit check is done earlier. + int SampleThreshold = SampleColdCallSiteThreshold; + if (CallsitePrioritizedInline) { + if (Candidate.CallsiteCount > PSI->getHotCountThreshold()) + SampleThreshold = SampleHotCallSiteThreshold; + else if (!ProfileSizeInline) + return InlineCost::getNever("cold callsite"); + } + + Function *Callee = Candidate.CallInstr->getCalledFunction(); + assert(Callee && "Expect a definition for inline candidate of direct call"); + + InlineParams Params = getInlineParams(); + Params.ComputeFullInlineCost = true; + // Checks if there is anything in the reachable portion of the callee at + // this callsite that makes this inlining potentially illegal. Need to + // set ComputeFullInlineCost, otherwise getInlineCost may return early + // when cost exceeds threshold without checking all IRs in the callee. + // The acutal cost does not matter because we only checks isNever() to + // see if it is legal to inline the callsite. + InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, + GetTTI(*Callee), GetAC, GetTLI); + + // For old FDO inliner, we inline the call site as long as cost is not + // "Never". The cost-benefit check is done earlier. + if (!CallsitePrioritizedInline) { + if (Cost.isNever()) + return Cost; + return InlineCost::getAlways("hot callsite previously inlined"); + } + + // Honor always inline and never inline from call analyzer + if (Cost.isNever() || Cost.isAlways()) + return Cost; + + // Otherwise only use the cost from call analyzer, but overwite threshold with + // Sample PGO threshold. + return InlineCost::get(Cost.getCost(), SampleThreshold); +} + +bool SampleProfileLoader::inlineHotFunctionsWithPriority( + Function &F, DenseSet &InlinedGUIDs) { + DenseSet PromotedInsns; + assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); + + // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure + // Profile symbol list is ignored when profile-sample-accurate is on. + assert((!ProfAccForSymsInList || + (!ProfileSampleAccurate && + !F.hasFnAttribute("profile-sample-accurate"))) && + "ProfAccForSymsInList should be false when profile-sample-accurate " + "is enabled"); + + // Populating worklist with initial call sites from root inliner, along + // with call site weights. + CandidateQueue CQueue; + InlineCandidate NewCandidate; + for (auto &BB : F) { + for (auto &I : BB.getInstList()) { + auto *CB = dyn_cast(&I); + if (!CB) + continue; + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.push(NewCandidate); + } + } + + // Cap the size growth from profile guided inlining. This is needed even + // though cost of each inline candidate already accounts for callee size, + // because with top-down inlining, we can grow inliner size significantly + // with large number of smaller inlinees each pass the cost check. + assert(ProfileInlineLimitMax >= ProfileInlineLimitMin && + "Max inline size limit should not be smaller than min inline size " + "limit."); + unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit; + SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax); + SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin); + if (ExternalInlineAdvisor) + SizeLimit = std::numeric_limits::max(); + + // Perform iterative BFS call site prioritized inlining + bool Changed = false; + while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) { + InlineCandidate Candidate = CQueue.top(); + CQueue.pop(); + CallBase *I = Candidate.CallInstr; + Function *CalledFunction = I->getCalledFunction(); + + if (CalledFunction == &F) + continue; + if (I->isIndirectCall()) { + if (PromotedInsns.count(I)) + continue; + uint64_t Sum; + auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); + uint64_t SumOrigin = Sum; + for (const auto *FS : CalleeSamples) { + // TODO: Consider disable pre-lTO ICP for MonoLTO as well + if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { + FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), + PSI->getOrCompHotCountThreshold()); + continue; + } + uint64_t EntryCountDistributed = FS->getEntrySamples(); + // In addition to regular inline cost check, we also need to make sure + // ICP isn't introducing excessive speculative checks even if individual + // target looks beneficial to promote and inline. That means we should + // only do ICP when there's a small number dominant targets. + if (EntryCountDistributed < SumOrigin / ProfileICPThreshold) + break; + // TODO: Fix CallAnalyzer to handle all indirect calls. + // For indirect call, we don't run CallAnalyzer to get InlineCost + // before actual inlining. This is because we could see two different + // types from the same definition, which makes CallAnalyzer choke as + // it's expecting matching parameter type on both caller and callee + // side. See example from PR18962 for the triggering cases (the bug was + // fixed, but we generate different types). + if (!PSI->isHotCount(EntryCountDistributed)) + break; + const char *Reason = nullptr; + auto CalleeFunctionName = FS->getFuncName(); + if (CallBase *DI = tryPromoteIndirectCall( + F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) { + // Attach function profile for promoted indirect callee, and update + // call site count for the promoted inline candidate too. + Candidate = {DI, FS, EntryCountDistributed}; + PromotedInsns.insert(I); + SmallVector InlinedCallSites; + // If profile mismatches, we should not attempt to inline DI. + if ((isa(DI) || isa(DI)) && + tryInlineCandidate(Candidate, InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); + } + Changed = true; + } + } else { + LLVM_DEBUG(dbgs() + << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); + } + } + } else if (CalledFunction && CalledFunction->getSubprogram() && + !CalledFunction->isDeclaration()) { + SmallVector InlinedCallSites; + if (tryInlineCandidate(Candidate, InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); + } + Changed = true; + } + } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { + findCalleeFunctionSamples(*I)->findInlinedFunctions( + InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); + } + } + + if (!CQueue.empty()) { + if (SizeLimit == (unsigned)ProfileInlineLimitMax) + ++NumCSInlinedHitMaxLimit; + else if (SizeLimit == (unsigned)ProfileInlineLimitMin) + ++NumCSInlinedHitMinLimit; + else + ++NumCSInlinedHitGrowthLimit; + } + + return Changed; +} + /// Find equivalence classes for the given block. /// /// This finds all the blocks that are guaranteed to execute the same @@ -1833,7 +2195,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } DenseSet InlinedGUIDs; - Changed |= inlineHotFunctions(F, InlinedGUIDs); + if (ProfileIsCS && CallsitePrioritizedInline) + Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs); + else + Changed |= inlineHotFunctions(F, InlinedGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); @@ -1978,6 +2343,12 @@ bool SampleProfileLoader::doInitialization(Module &M, ProfileIsCS = true; FunctionSamples::ProfileIsCS = true; + // Enable priority-base inliner and size inline by default for CSSPGO. + if (!ProfileSizeInline.getNumOccurrences()) + ProfileSizeInline = true; + if (!CallsitePrioritizedInline.getNumOccurrences()) + CallsitePrioritizedInline = true; + // Tracker for profiles under different context ContextTracker = std::make_unique(Reader->getProfiles()); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof new file mode 100644 index 000000000000..095c7a1fc480 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof @@ -0,0 +1,10 @@ +[test]:63067:0 + 1: 3345 _Z3barv:1398 _Z3foov:2059 + 2: 100 _Z3bazv:102 + 3: 100 _Z3zoov:102 +[test:1 @ _Z3barv]:200:100 + 1: 100 +[test:1 @ _Z3foov]:4220:1200 + 14: 4220 +[test:2 @ _Z3bazv]:200:100 + 5: 100 \ No newline at end of file diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll new file mode 100644 index 000000000000..e5f2f7571eaf --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll @@ -0,0 +1,166 @@ +; REQUIRES: asserts +; Test that the new FDO inliner using prioty queue will not visit same call site again and again. +; Use debug prints as repeated call site evaluation is not visible from final inline decision. + +; Note that we need new pass manager to enable top-down processing for sample profile loader +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=OLD-INLINE +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=1 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=NEW-INLINE + +; Old inliner will evaluate the same call site three times +; OLD-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; OLD-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; OLD-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; OLD-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; OLD-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; OLD-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi + +; New inliner only evaluate the same call site once +; NEW-INLINE: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; NEW-INLINE-NEXT: Callee context found: main:3.1 @ _Z5funcBi +; NEW-INLINE-NOT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi +; NEW-INLINE-NOT: Callee context found: main:3.1 @ _Z5funcBi + +@factor = dso_local global i32 3, align 4, !dbg !0 + +define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { +entry: + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret i32 %add3, !dbg !27 + +for.body: ; preds = %for.body, %entry + %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] + %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 + %add = add nuw nsw i32 %x.011, 1, !dbg !31 + %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 + %add2 = add i32 %call, %r.010, !dbg !34 + %add3 = add i32 %add2, %call1, !dbg !35 + %dec = add nsw i32 %x.011, -1, !dbg !36 + %cmp = icmp eq i32 %x.011, 0, !dbg !38 + br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 + ret i32 %call, !dbg !46 +} + +define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { +entry: + %cmp = icmp sgt i32 %x, 0, !dbg !57 + br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 + +while.cond2.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %x, 0, !dbg !60 + br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 + +while.body: ; preds = %while.body, %entry + %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] + %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 + %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 + %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 + %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 + br i1 %cmp1, label %while.body, label %if.end, !dbg !71 + +while.body4: ; preds = %while.body4, %while.cond2.preheader + %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] + %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 + %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 + %add = add nsw i32 %call5, %x.addr.114, !dbg !75 + %cmp3 = icmp slt i32 %add, 0, !dbg !60 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 + +if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader + %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] + ret i32 %x.addr.2, !dbg !76 +} + +define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { +entry: + %sub = add nsw i32 %x, -100000, !dbg !51 + %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 + ret i32 %call, !dbg !53 +} + +declare i32 @_Z3fibi(i32) + +attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") +!4 = !{} +!5 = !{!6, !10, !11} +!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!12 = !{!0} +!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 11.0.0"} +!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!9} +!21 = !{!22, !23} +!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) +!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) +!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) +!25 = !DILocation(line: 13, column: 3, scope: !26) +!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) +!27 = !DILocation(line: 17, column: 3, scope: !18) +!28 = !DILocation(line: 14, column: 10, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) +!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) +!31 = !DILocation(line: 14, column: 29, scope: !29) +!32 = !DILocation(line: 14, column: 21, scope: !33) +!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!34 = !DILocation(line: 14, column: 19, scope: !29) +!35 = !DILocation(line: 14, column: 7, scope: !29) +!36 = !DILocation(line: 13, column: 33, scope: !37) +!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) +!38 = !DILocation(line: 13, column: 26, scope: !39) +!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) +!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!44 = !DILocation(line: 27, column: 22, scope: !40) +!45 = !DILocation(line: 27, column: 11, scope: !40) +!46 = !DILocation(line: 29, column: 3, scope: !40) +!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!51 = !DILocation(line: 33, column: 22, scope: !47) +!52 = !DILocation(line: 33, column: 11, scope: !47) +!53 = !DILocation(line: 35, column: 3, scope: !47) +!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!57 = !DILocation(line: 49, column: 9, scope: !58) +!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) +!59 = !DILocation(line: 49, column: 7, scope: !54) +!60 = !DILocation(line: 58, column: 14, scope: !61) +!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) +!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) +!63 = !DILocation(line: 58, column: 5, scope: !61) +!64 = !DILocation(line: 52, column: 16, scope: !65) +!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) +!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) +!67 = !DILocation(line: 52, column: 12, scope: !65) +!68 = !DILocation(line: 52, column: 9, scope: !65) +!69 = !DILocation(line: 51, column: 14, scope: !70) +!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) +!71 = !DILocation(line: 51, column: 5, scope: !70) +!72 = !DILocation(line: 59, column: 16, scope: !73) +!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) +!74 = !DILocation(line: 59, column: 12, scope: !73) +!75 = !DILocation(line: 59, column: 9, scope: !73) +!76 = !DILocation(line: 63, column: 3, scope: !54) diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll new file mode 100644 index 000000000000..3ec64326da2d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll @@ -0,0 +1,63 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s + +define void @test(void ()*) #0 !dbg !3 { +;; Add two direct call to force top-down order for sample profile loader + call void @_Z3foov(), !dbg !7 + call void @_Z3barv(), !dbg !7 + call void @_Z3bazv(), !dbg !7 + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + call void %3(), !dbg !4 + %4 = alloca void ()* + store void ()* %0, void ()** %4 + %5 = load void ()*, void ()** %4 + call void %5(), !dbg !5 + ret void +} + +define void @_Z3foov() #0 !dbg !8 { + ret void +} + +define void @_Z3barv() #0 !dbg !9 { + ret void +} + +define void @_Z3bazv() #0 !dbg !10 { + ret void +} + +define void @_Z3zoov() #0 !dbg !11 { + ret void +} + +attributes #0 = {"use-sample-profile"} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, unit: !0) +!4 = !DILocation(line: 4, scope: !3) +!5 = !DILocation(line: 5, scope: !3) +!6 = !DILocation(line: 6, scope: !3) +!7 = !DILocation(line: 7, scope: !3) +!8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 29, unit: !0) +!9 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 32, unit: !0) +!10 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 24, unit: !0) +!11 = distinct !DISubprogram(name: "zoo", linkageName: "_Z3zoov", scope: !1, file: !1, line: 24, unit: !0) + + +; ICP-ALL: remark: test.cc:5:0: _Z3bazv inlined into test +; ICP-ALL-NEXT: remark: test.cc:4:0: _Z3foov inlined into test +; ICP-ALL-NEXT: remark: test.cc:4:0: _Z3barv inlined into test +; ICP-ALL-NOT: remark + +; ICP-HOT: remark: test.cc:4:0: _Z3foov inlined into test +; ICP-HOT-NOT: remark diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll new file mode 100644 index 000000000000..14e916d8c2e8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll @@ -0,0 +1,180 @@ +; Test for CSSPGO's new early inliner using priority queue + +; Note that we need new pass manager to enable top-down processing for sample profile loader +; Test we inlined the following in top-down order with old inliner +; main:3 @ _Z5funcAi +; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi +; _Z5funcBi:1 @ _Z8funcLeafi +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; +; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW +; +; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; +; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE +; +; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob. +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1 +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2 + + +; INLINE-BASE: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 +; INLINE-BASE: remark: merged.cpp:27:11: _Z8funcLeafi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10 +; INLINE-BASE: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 + +; INLINE-NEW: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 +; INLINE-NEW-NOT: remark + +; INLINE-NEW-LIMIT1-NOT: remark + +; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 +; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 +; INLINE-NEW-LIMIT2-NOT: remark + +@factor = dso_local global i32 3, align 4, !dbg !0 + +define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { +entry: + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret i32 %add3, !dbg !27 + +for.body: ; preds = %for.body, %entry + %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] + %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 + %add = add nuw nsw i32 %x.011, 1, !dbg !31 + %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 + %add2 = add i32 %call, %r.010, !dbg !34 + %add3 = add i32 %add2, %call1, !dbg !35 + %dec = add nsw i32 %x.011, -1, !dbg !36 + %cmp = icmp eq i32 %x.011, 0, !dbg !38 + br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 + ret i32 %call, !dbg !46 +} + +define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { +entry: + %cmp = icmp sgt i32 %x, 0, !dbg !57 + br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 + +while.cond2.preheader: ; preds = %entry + %cmp313 = icmp slt i32 %x, 0, !dbg !60 + br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 + +while.body: ; preds = %while.body, %entry + %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] + %tmp = load volatile i32, i32* @factor, align 4, !dbg !64 + %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 + %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 + %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 + br i1 %cmp1, label %while.body, label %if.end, !dbg !71 + +while.body4: ; preds = %while.body4, %while.cond2.preheader + %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] + %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72 + %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 + %add = add nsw i32 %call5, %x.addr.114, !dbg !75 + %cmp3 = icmp slt i32 %add, 0, !dbg !60 + br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 + +if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader + %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] + ret i32 %x.addr.2, !dbg !76 +} + +define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { +entry: + %sub = add nsw i32 %x, -100000, !dbg !51 + %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 + ret i32 %call, !dbg !53 +} + +declare i32 @_Z3fibi(i32) + +attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } +attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") +!4 = !{} +!5 = !{!6, !10, !11} +!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9} +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) +!12 = !{!0} +!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 11.0.0"} +!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!9} +!21 = !{!22, !23} +!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) +!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) +!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) +!25 = !DILocation(line: 13, column: 3, scope: !26) +!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) +!27 = !DILocation(line: 17, column: 3, scope: !18) +!28 = !DILocation(line: 14, column: 10, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) +!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) +!31 = !DILocation(line: 14, column: 29, scope: !29) +!32 = !DILocation(line: 14, column: 21, scope: !33) +!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!34 = !DILocation(line: 14, column: 19, scope: !29) +!35 = !DILocation(line: 14, column: 7, scope: !29) +!36 = !DILocation(line: 13, column: 33, scope: !37) +!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) +!38 = !DILocation(line: 13, column: 26, scope: !39) +!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) +!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!44 = !DILocation(line: 27, column: 22, scope: !40) +!45 = !DILocation(line: 27, column: 11, scope: !40) +!46 = !DILocation(line: 29, column: 3, scope: !40) +!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!51 = !DILocation(line: 33, column: 22, scope: !47) +!52 = !DILocation(line: 33, column: 11, scope: !47) +!53 = !DILocation(line: 35, column: 3, scope: !47) +!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!57 = !DILocation(line: 49, column: 9, scope: !58) +!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) +!59 = !DILocation(line: 49, column: 7, scope: !54) +!60 = !DILocation(line: 58, column: 14, scope: !61) +!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) +!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) +!63 = !DILocation(line: 58, column: 5, scope: !61) +!64 = !DILocation(line: 52, column: 16, scope: !65) +!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) +!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) +!67 = !DILocation(line: 52, column: 12, scope: !65) +!68 = !DILocation(line: 52, column: 9, scope: !65) +!69 = !DILocation(line: 51, column: 14, scope: !70) +!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) +!71 = !DILocation(line: 51, column: 5, scope: !70) +!72 = !DILocation(line: 59, column: 16, scope: !73) +!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) +!74 = !DILocation(line: 59, column: 12, scope: !73) +!75 = !DILocation(line: 59, column: 9, scope: !73) +!76 = !DILocation(line: 63, column: 3, scope: !54) diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll index 1a7a53457a5d..7789e18b394a 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll @@ -3,11 +3,11 @@ ; based on inline decision, so post inline counts are accurate. ; Note that we need new pass manager to enable top-down processing for sample profile loader -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-prioritized-inline=0 -sample-profile-inline-size=0 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT -; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile +; Test we inlined the following in top-down order and promot rest not inlined context profile into base profile ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi @@ -20,13 +20,9 @@ ; INLINE-ALL-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi ; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi -; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi( -; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi ; INLINE-ALL-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z8funcLeafi ; INLINE-ALL-NEXT: Callee context found: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-ALL-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi -; INLINE-ALL-NEXT: Callee context found: main:3.1 @ _Z5funcBi ; INLINE-ALL-NEXT: Getting callee context for instr: %call.i1 = tail call i32 @_Z3fibi ; INLINE-ALL-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi ; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi @@ -48,24 +44,23 @@ ; INLINE-ALL-NEXT: Getting base profile for function: _Z8funcLeafi ; INLINE-ALL-NEXT: Merging context profile into base profile: _Z8funcLeafi -; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile -; main:3 @ _Z5funcAi +; Test we inlined the following in top-down order and promot rest not inlined context profile into base profile ; _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT: Getting base profile for function: main ; INLINE-HOT-NEXT: Merging context profile into base profile: main ; INLINE-HOT-NEXT: Found context tree root to promote: external:12 @ main ; INLINE-HOT-NEXT: Context promoted and merged to: main -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !58 +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z5funcBi ; INLINE-HOT-NEXT: Callee context found: main:3.1 @ _Z5funcBi -; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !63 +; INLINE-HOT-NEXT: Getting callee context for instr: %call1 = tail call i32 @_Z5funcAi ; INLINE-HOT-NEXT: Callee context found: main:3 @ _Z5funcAi ; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi ; INLINE-HOT-NEXT: Merging context profile into base profile: _Z5funcAi ; INLINE-HOT-NEXT: Found context tree root to promote: main:3 @ _Z5funcAi ; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi ; INLINE-HOT-NEXT: Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50 ; INLINE-HOT-NEXT: Callee context found: _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 @@ -79,11 +74,11 @@ ; INLINE-HOT-NEXT: Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Found context tree root to promote: externalA:17 @ _Z5funcBi ; INLINE-HOT-NEXT: Context promoted and merged to: _Z5funcBi -; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !50 +; INLINE-HOT-NEXT: Getting callee context for instr: %call = tail call i32 @_Z8funcLeafi ; INLINE-HOT-NEXT: Callee context found: _Z5funcBi:1 @ _Z8funcLeafi ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi -; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62 -; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69 +; INLINE-HOT-NEXT: Getting callee context for instr: %call.i = tail call i32 @_Z3fibi +; INLINE-HOT-NEXT: Getting callee context for instr: %call5.i = tail call i32 @_Z3fibi ; INLINE-HOT-NEXT: Getting base profile for function: _Z8funcLeafi ; INLINE-HOT-NEXT: Merging context profile into base profile: _Z8funcLeafi diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll index adda7022047d..8d4e23829941 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll @@ -4,19 +4,18 @@ ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t ; Note that we need new pass manager to enable top-down processing for sample profile loader -; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile +; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; main:3 @ _Z5funcAi ; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL - -; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile -; main:3 @ _Z5funcAi +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL +; +; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile ; _Z5funcAi:1 @ _Z8funcLeafi ; _Z5funcBi:1 @ _Z8funcLeafi -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT @factor = dso_local global i32 3, align 4, !dbg !0 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index a5033a0dc190..d47359fa0b5f 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -1,8 +1,8 @@ -; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s ; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2 -; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s +; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s ; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" From c2f3f45b5c5bd6f9b86a766fc40130b34acb8293 Mon Sep 17 00:00:00 2001 From: Wenlei He Date: Tue, 19 Jan 2021 23:29:14 -0800 Subject: [PATCH 056/244] [CSSPGO] Factor out common part for CSSPGO inline and AFDO inline Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO inlining and reduce similar code in the inlining loop, plus minor cleanup for AFDO path. This is resubmit of D95024, with build break and overtighten assertion fixed. Test Plan: (cherry picked from commit 1645f465be85223e9f5b6303a3e5e0e491fd819f) --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 205 +++++++----------- .../SampleProfile/pseudo-probe-inline.ll | 2 +- llvm/test/Transforms/SampleProfile/remarks.ll | 4 +- 3 files changed, 80 insertions(+), 131 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 665c4078f3ee..2cfefd3a18ea 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -416,20 +416,18 @@ class SampleProfileLoader { findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; mutable DenseMap DILocation2SampleMap; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName, - uint64_t &Sum, uint64_t Count, CallBase *I, - const char *&Reason); - bool inlineCallInstruction(CallBase &CB, - const FunctionSamples *CalleeSamples); + // Attempt to promote indirect call and also inline the promoted call + bool tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t &Sum, + DenseSet &PromotedInsns, + SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); - // Helper functions call-site prioritized BFS inliner - // Will change the main FDO inliner to be work list based directly in - // upstream, then merge this change with that and remove the duplication. InlineCost shouldInlineCandidate(InlineCandidate &Candidate); bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB); - bool tryInlineCandidate(InlineCandidate &Candidate, - SmallVector &InlinedCallSites); + bool + tryInlineCandidate(InlineCandidate &Candidate, + SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctionsWithPriority(Function &F, DenseSet &InlinedGUIDs); @@ -1077,70 +1075,46 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return it.first->second; } -CallBase * -SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName, - uint64_t &Sum, uint64_t Count, - CallBase *I, const char *&Reason) { - Reason = "Callee function not available"; +/// Attempt to promote indirect call and also inline the promoted call. +/// +/// \param F Caller function. +/// \param Candidate ICP and inline candidate. +/// \param Sum Sum of target counts for indirect call. +/// \param PromotedInsns Map to keep track of indirect call already processed. +/// \param Candidate ICP and inline candidate. +/// \param InlinedCallSite Output vector for new call sites exposed after +/// inlining. +bool SampleProfileLoader::tryPromoteAndInlineCandidate( + Function &F, InlineCandidate &Candidate, uint64_t &Sum, + DenseSet &PromotedInsns, + SmallVector *InlinedCallSite) { + const char *Reason = "Callee function not available"; // R->getValue() != &F is to prevent promoting a recursive call. // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is // recursive. As llvm does not inline recursive calls, we will // simply ignore it instead of handling it explicitly. - auto R = SymbolMap.find(CalleeName); + auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName()); if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && R->getValue()->hasFnAttribute("use-sample-profile") && - R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) { + R->getValue() != &F && + isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) { auto *DI = - &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE); - Sum -= Count; - return DI; - } - return nullptr; -} - -bool SampleProfileLoader::inlineCallInstruction( - CallBase &CB, const FunctionSamples *CalleeSamples) { - if (ExternalInlineAdvisor) { - auto Advice = ExternalInlineAdvisor->getAdvice(CB); - if (!Advice->isInliningRecommended()) { - Advice->recordUnattemptedInlining(); - return false; + &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(), + Candidate.CallsiteCount, Sum, false, ORE); + if (DI) { + Sum -= Candidate.CallsiteCount; + PromotedInsns.insert(Candidate.CallInstr); + Candidate.CallInstr = DI; + if (isa(DI) || isa(DI)) + return tryInlineCandidate(Candidate, InlinedCallSite); } - // Dummy record, we don't use it for replay. - Advice->recordInlining(); - } - - Function *CalledFunction = CB.getCalledFunction(); - assert(CalledFunction); - DebugLoc DLoc = CB.getDebugLoc(); - BasicBlock *BB = CB.getParent(); - InlineParams Params = getInlineParams(); - Params.ComputeFullInlineCost = true; - // Checks if there is anything in the reachable portion of the callee at - // this callsite that makes this inlining potentially illegal. Need to - // set ComputeFullInlineCost, otherwise getInlineCost may return early - // when cost exceeds threshold without checking all IRs in the callee. - // The acutal cost does not matter because we only checks isNever() to - // see if it is legal to inline the callsite. - InlineCost Cost = - getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI); - if (Cost.isNever()) { - ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB) - << "incompatible inlining"); - return false; - } - InlineFunctionInfo IFI(nullptr, GetAC); - if (InlineFunction(CB, IFI).isSuccess()) { - // The call to InlineFunction erases I, so we can't pass it here. - emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost, - true, CSINLINE_DEBUG); - if (ProfileIsCS) - ContextTracker->markContextSamplesInlined(CalleeSamples); - ++NumCSInlined; - return true; + } else { + LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " + << Candidate.CalleeSamples->getFuncName() << " because " + << Reason << "\n"); } return false; } @@ -1206,10 +1180,11 @@ bool SampleProfileLoader::inlineHotFunctions( "ProfAccForSymsInList should be false when profile-sample-accurate " "is enabled"); - DenseMap localNotInlinedCallSites; + DenseMap LocalNotInlinedCallSites; bool Changed = false; - while (true) { - bool LocalChanged = false; + bool LocalChanged = true; + while (LocalChanged) { + LocalChanged = false; SmallVector CIS; for (auto &BB : F) { bool Hot = false; @@ -1223,7 +1198,7 @@ bool SampleProfileLoader::inlineHotFunctions( "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); if (FS->getEntrySamples() > 0 || ProfileIsCS) - localNotInlinedCallSites.try_emplace(CB, FS); + LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI)) Hot = true; else if (shouldInlineColdCallee(*CB)) @@ -1241,6 +1216,11 @@ bool SampleProfileLoader::inlineHotFunctions( } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); + InlineCandidate Candidate = {I, + LocalNotInlinedCallSites.count(I) + ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1257,30 +1237,16 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - const char *Reason = nullptr; - auto CalleeFunctionName = FS->getFuncName(); - if (CallBase *DI = - tryPromoteIndirectCall(F, CalleeFunctionName, Sum, - FS->getEntrySamples(), I, Reason)) { - PromotedInsns.insert(I); - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - inlineCallInstruction(cast(*DI), FS)) { - localNotInlinedCallSites.erase(I); - LocalChanged = true; - } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Candidate = {I, FS, FS->getEntrySamples()}; + if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) { + LocalNotInlinedCallSites.erase(I); + LocalChanged = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { - if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I) - ? localNotInlinedCallSites[I] - : nullptr)) { - localNotInlinedCallSites.erase(I); + if (tryInlineCandidate(Candidate)) { + LocalNotInlinedCallSites.erase(I); LocalChanged = true; } } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1288,11 +1254,7 @@ bool SampleProfileLoader::inlineHotFunctions( InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); } } - if (LocalChanged) { - Changed = true; - } else { - break; - } + Changed |= LocalChanged; } // For CS profile, profile for not inlined context will be merged when @@ -1301,7 +1263,7 @@ bool SampleProfileLoader::inlineHotFunctions( return Changed; // Accumulate not inlined callsite information into notInlinedSamples - for (const auto &Pair : localNotInlinedCallSites) { + for (const auto &Pair : LocalNotInlinedCallSites) { CallBase *I = Pair.getFirst(); Function *Callee = I->getCalledFunction(); if (!Callee || Callee->isDeclaration()) @@ -1347,7 +1309,7 @@ bool SampleProfileLoader::inlineHotFunctions( } bool SampleProfileLoader::tryInlineCandidate( - InlineCandidate &Candidate, SmallVector &InlinedCallSites) { + InlineCandidate &Candidate, SmallVector *InlinedCallSites) { CallBase &CB = *Candidate.CallInstr; Function *CalledFunction = CB.getCalledFunction(); @@ -1372,9 +1334,11 @@ bool SampleProfileLoader::tryInlineCandidate( true, CSINLINE_DEBUG); // Now populate the list of newly exposed call sites. - InlinedCallSites.clear(); - for (auto &I : IFI.InlinedCallSites) - InlinedCallSites.push_back(I); + if (InlinedCallSites) { + InlinedCallSites->clear(); + for (auto &I : IFI.InlinedCallSites) + InlinedCallSites->push_back(I); + } if (ProfileIsCS) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); @@ -1409,8 +1373,6 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, InlineCost SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { - assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now"); - std::unique_ptr Advice = nullptr; if (ExternalInlineAdvisor) { Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr); @@ -1446,18 +1408,16 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params, GetTTI(*Callee), GetAC, GetTLI); + // Honor always inline and never inline from call analyzer + if (Cost.isNever() || Cost.isAlways()) + return Cost; + // For old FDO inliner, we inline the call site as long as cost is not // "Never". The cost-benefit check is done earlier. if (!CallsitePrioritizedInline) { - if (Cost.isNever()) - return Cost; - return InlineCost::getAlways("hot callsite previously inlined"); + return InlineCost::get(Cost.getCost(), INT_MAX); } - // Honor always inline and never inline from call analyzer - if (Cost.isNever() || Cost.isAlways()) - return Cost; - // Otherwise only use the cost from call analyzer, but overwite threshold with // Sample PGO threshold. return InlineCost::get(Cost.getCost(), SampleThreshold); @@ -1542,34 +1502,23 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( // fixed, but we generate different types). if (!PSI->isHotCount(EntryCountDistributed)) break; - const char *Reason = nullptr; - auto CalleeFunctionName = FS->getFuncName(); - if (CallBase *DI = tryPromoteIndirectCall( - F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) { - // Attach function profile for promoted indirect callee, and update - // call site count for the promoted inline candidate too. - Candidate = {DI, FS, EntryCountDistributed}; - PromotedInsns.insert(I); - SmallVector InlinedCallSites; - // If profile mismatches, we should not attempt to inline DI. - if ((isa(DI) || isa(DI)) && - tryInlineCandidate(Candidate, InlinedCallSites)) { - for (auto *CB : InlinedCallSites) { - if (getInlineCandidate(&NewCandidate, CB)) - CQueue.emplace(NewCandidate); - } - Changed = true; + SmallVector InlinedCallSites; + // Attach function profile for promoted indirect callee, and update + // call site count for the promoted inline candidate too. + Candidate = {I, FS, EntryCountDistributed}; + if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns, + &InlinedCallSites)) { + for (auto *CB : InlinedCallSites) { + if (getInlineCandidate(&NewCandidate, CB)) + CQueue.emplace(NewCandidate); } - } else { - LLVM_DEBUG(dbgs() - << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); + Changed = true; } } } else if (CalledFunction && CalledFunction->getSubprogram() && !CalledFunction->isDeclaration()) { SmallVector InlinedCallSites; - if (tryInlineCandidate(Candidate, InlinedCallSites)) { + if (tryInlineCandidate(Candidate, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index d47359fa0b5f..5359fd4da067 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -89,7 +89,7 @@ if.end: ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '15' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - Threshold: '2147483647' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: foo diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll index 3add1e74abaa..46f016433b20 100644 --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -21,7 +21,7 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21; +; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21; ; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. @@ -47,7 +47,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '130' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '225' +;YAML-NEXT: - Threshold: '2147483647' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: main From a9157c5628dc89b13936bbc8eef261cb02d63d40 Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Fri, 11 Dec 2020 12:18:31 -0800 Subject: [PATCH 057/244] [CSSPGO] Introducing distribution factor for pseudo probe. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count. This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes. A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead. Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D93264 (cherry picked from commit 3d89b3cbec230633e8228787819b15116c1a1730) --- clang/test/CodeGen/pseudo-probe-emit.c | 8 +- llvm/include/llvm/IR/IntrinsicInst.h | 8 +- llvm/include/llvm/IR/Intrinsics.td | 2 +- llvm/include/llvm/IR/PseudoProbe.h | 27 ++- .../llvm/Passes/StandardInstrumentations.h | 2 + llvm/include/llvm/ProfileData/SampleProf.h | 10 ++ .../llvm/Transforms/IPO/SampleProfileProbe.h | 41 +++++ llvm/lib/IR/PseudoProbe.cpp | 41 +++++ llvm/lib/Passes/PassBuilder.cpp | 6 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Passes/StandardInstrumentations.cpp | 1 + llvm/lib/Transforms/IPO/SampleProfile.cpp | 108 +++++++++--- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 162 +++++++++++++++++- .../Inputs/pseudo-probe-update.prof | 8 + .../SampleProfile/pseudo-probe-emit-inline.ll | 20 +-- .../SampleProfile/pseudo-probe-emit.ll | 22 ++- .../SampleProfile/pseudo-probe-inline.ll | 38 ++-- .../SampleProfile/pseudo-probe-profile.ll | 42 ++++- .../SampleProfile/pseudo-probe-update.ll | 45 +++++ .../SampleProfile/pseudo-probe-verify.ll | 77 +++++++++ 20 files changed, 595 insertions(+), 74 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c index 059673b6992e..fccc8f04844d 100644 --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -6,12 +6,12 @@ void bar(); void go(); void foo(int x) { - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) if (x == 0) - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) bar(); else - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1) go(); - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) } diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 9d68f3fdde6c..df3a1d568756 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -981,12 +981,16 @@ class PseudoProbeInst : public IntrinsicInst { return cast(const_cast(getArgOperand(0))); } + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } + ConstantInt *getAttributes() const { return cast(const_cast(getArgOperand(2))); } - ConstantInt *getIndex() const { - return cast(const_cast(getArgOperand(1))); + ConstantInt *getFactor() const { + return cast(const_cast(getArgOperand(3))); } }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index b2bfc6e6f9e6..21307ed1bd91 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1298,7 +1298,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int // Like the sideeffect intrinsic defined above, this intrinsic is treated by the // optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. -def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; // Intrinsics to support half precision floating point format diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h index e0370c264102..5165e80caa2d 100644 --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -16,28 +16,39 @@ #include "llvm/ADT/Optional.h" #include #include +#include namespace llvm { class Instruction; +class BasicBlock; constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +// The saturated distrution factor representing 100% for block probes. +constexpr static uint64_t PseudoProbeFullDistributionFactor = + std::numeric_limits::max(); + struct PseudoProbeDwarfDiscriminator { +public: // The following APIs encodes/decodes per-probe information to/from a // 32-bit integer which is organized as: // [2:0] - 0x7, this is reserved for regular discriminator, // see DWARF discriminator encoding rule // [18:3] - probe id - // [25:19] - reserved + // [25:19] - probe distribution factor // [28:26] - probe type, see PseudoProbeType // [31:29] - reserved for probe attributes - static uint32_t packProbeData(uint32_t Index, uint32_t Type) { + static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags, + uint32_t Factor) { assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16"); assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7"); - return (Index << 3) | (Type << 26) | 0x7; + assert(Flags <= 0x7); + assert(Factor <= 100 && + "Probe distribution factor too big to encode, exceeding 100"); + return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7; } static uint32_t extractProbeIndex(uint32_t Value) { @@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator { static uint32_t extractProbeAttributes(uint32_t Value) { return (Value >> 29) & 0x7; } + + static uint32_t extractProbeFactor(uint32_t Value) { + return (Value >> 19) & 0x7F; + } + + // The saturated distrution factor representing 100% for callsites. + constexpr static uint8_t FullDistributionFactor = 100; }; struct PseudoProbe { uint32_t Id; uint32_t Type; uint32_t Attr; + float Factor; }; Optional extractProbe(const Instruction &Inst); +void setProbeDistributionFactor(Instruction &Inst, float Factor); + } // end namespace llvm #endif // LLVM_IR_PSEUDOPROBE_H diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 795a980878e2..61c86b0468f2 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -22,6 +22,7 @@ #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/SampleProfileProbe.h" #include #include @@ -273,6 +274,7 @@ class StandardInstrumentations { OptBisectInstrumentation OptBisect; PreservedCFGCheckerInstrumentation PreservedCFGChecker; IRChangedPrinter PrintChangedIR; + PseudoProbeVerifier PseudoProbeVerification; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 346bc4c81d86..25d5b2376c11 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -347,6 +347,16 @@ class SampleRecord { return SortedTargets; } + /// Prorate call targets by a distribution factor. + static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, + float DistributionFactor) { + CallTargetMap AdjustedTargets; + for (const auto &I : Targets) { + AdjustedTargets[I.first()] = I.second * DistributionFactor; + } + return AdjustedTargets; + } + /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 78117fd4a9c2..cab893b50d19 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -16,6 +16,10 @@ #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" @@ -29,6 +33,8 @@ class Module; using namespace sampleprof; using BlockIdMap = std::unordered_map; using InstructionIdMap = std::unordered_map; +using ProbeFactorMap = std::unordered_map; +using FuncProbeFactorMap = StringMap; enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; @@ -43,6 +49,33 @@ class PseudoProbeDescriptor { uint64_t getFunctionHash() const { return FunctionHash; } }; +// A pseudo probe verifier that can be run after each IR passes to detect the +// violation of updating probe factors. In principle, the sum of distribution +// factor for a probe should be identical before and after a pass. For a +// function pass, the factor sum for a probe would be typically 100%. +class PseudoProbeVerifier { +public: + void registerCallbacks(PassInstrumentationCallbacks &PIC); + + // Implementation of pass instrumentation callbacks for new pass manager. + void runAfterPass(StringRef PassID, Any IR); + +private: + // Allow a little bias due the rounding to integral factors. + constexpr static float DistributionFactorVariance = 0.02; + // Distribution factors from last pass. + FuncProbeFactorMap FunctionProbeFactors; + + void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors); + void runAfterPass(const Module *M); + void runAfterPass(const LazyCallGraph::SCC *C); + void runAfterPass(const Function *F); + void runAfterPass(const Loop *L); + bool shouldVerifyFunction(const Function *F); + void verifyProbeFactors(const Function *F, + const ProbeFactorMap &ProbeFactors); +}; + // This class serves sample counts correlation for SampleProfileLoader by // analyzing pseudo probes and their function descriptors injected by // SampleProfileProber. @@ -102,5 +135,13 @@ class SampleProfileProbePass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +class PseudoProbeUpdatePass : public PassInfoMixin { + void runOnFunction(Function &F, FunctionAnalysisManager &FAM); + +public: + PseudoProbeUpdatePass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp index 804214f06e7a..80d2963938d4 100644 --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -35,6 +35,9 @@ Optional extractProbeFromDiscriminator(const Instruction &Inst) { PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); Probe.Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator); + Probe.Factor = + PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) / + (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor; return Probe; } } @@ -47,6 +50,8 @@ Optional extractProbe(const Instruction &Inst) { Probe.Id = II->getIndex()->getZExtValue(); Probe.Type = (uint32_t)PseudoProbeType::Block; Probe.Attr = II->getAttributes()->getZExtValue(); + Probe.Factor = II->getFactor()->getZExtValue() / + (float)PseudoProbeFullDistributionFactor; return Probe; } @@ -55,4 +60,40 @@ Optional extractProbe(const Instruction &Inst) { return None; } + +void setProbeDistributionFactor(Instruction &Inst, float Factor) { + assert(Factor >= 0 && Factor <= 1 && + "Distribution factor must be in [0, 1.0]"); + if (auto *II = dyn_cast(&Inst)) { + IRBuilder<> Builder(&Inst); + uint64_t IntFactor = PseudoProbeFullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + auto OrigFactor = II->getFactor()->getZExtValue(); + if (IntFactor != OrigFactor) + II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor)); + } else if (isa(&Inst) && !isa(&Inst)) { + if (const DebugLoc &DLoc = Inst.getDebugLoc()) { + const DILocation *DIL = DLoc; + auto Discriminator = DIL->getDiscriminator(); + if (DILocation::isPseudoProbeDiscriminator(Discriminator)) { + auto Index = + PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); + auto Type = + PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); + auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes( + Discriminator); + // Round small factors to 0 to avoid over-counting. + uint32_t IntFactor = + PseudoProbeDwarfDiscriminator::FullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, Attr, IntFactor); + DIL = DIL->cloneWithDiscriminator(V); + Inst.setDebugLoc(DIL); + } + } + } +} } // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d4c4c6e01ef5..6c1a7c75d30a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1423,6 +1423,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1477,6 +1480,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { if (PTO.Coroutines) MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 860bfade733d..877cb9ed13b3 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) +MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) #undef MODULE_PASS #ifndef CGSCC_ANALYSIS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index a8bfe02d4432..6795aed7b04e 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks( OptBisect.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); PrintChangedIR.registerCallbacks(PIC); + PseudoProbeVerification.registerCallbacks(PIC); if (VerifyEach) Verify.registerCallbacks(PIC); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 2cfefd3a18ea..b2a9127773c3 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -108,6 +108,8 @@ STATISTIC(NumCSNotInlined, STATISTIC(NumMismatchedProfile, "Number of functions with CFG mismatched profile"); STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile"); +STATISTIC(NumDuplicatedInlinesite, + "Number of inlined callsites with a partial distribution factor"); STATISTIC(NumCSInlinedHitMinLimit, "Number of functions with FDO inline stopped due to min size limit"); @@ -358,7 +360,14 @@ class GUIDToFuncNameMapper { struct InlineCandidate { CallBase *CallInstr; const FunctionSamples *CalleeSamples; + // Prorated callsite count, which will be used to guide inlining. For example, + // if a callsite is duplicated in LTO prelink, then in LTO postlink the two + // copies will get their own distribution factors and their prorated counts + // will be used to decide if they should be inlined independently. uint64_t CallsiteCount; + // Call site distribution factor to prorate the profile samples for a + // duplicated callsite. Default value is 1.0. + float CallsiteDistribution; }; // Inline candidate comparer using call site weight @@ -418,8 +427,8 @@ class SampleProfileLoader { const FunctionSamples *findFunctionSamples(const Instruction &I) const; // Attempt to promote indirect call and also inline the promoted call bool tryPromoteAndInlineCandidate( - Function &F, InlineCandidate &Candidate, uint64_t &Sum, - DenseSet &PromotedInsns, + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, + uint64_t &Sum, DenseSet &PromotedInsns, SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); @@ -886,7 +895,7 @@ ErrorOr SampleProfileLoader::getProbeWeight(const Instruction &Inst) { const ErrorOr &R = FS->findSamplesAt(Probe->Id, 0); if (R) { - uint64_t Samples = R.get(); + uint64_t Samples = R.get() * Probe->Factor; bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); if (FirstMark) { ORE->emit([&]() { @@ -894,13 +903,17 @@ ErrorOr SampleProfileLoader::getProbeWeight(const Instruction &Inst) { Remark << "Applied " << ore::NV("NumSamples", Samples); Remark << " samples from profile (ProbeId="; Remark << ore::NV("ProbeId", Probe->Id); + Remark << ", Factor="; + Remark << ore::NV("Factor", Probe->Factor); + Remark << ", OriginalSamples="; + Remark << ore::NV("OriginalSamples", R.get()); Remark << ")"; return Remark; }); } - LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst - << " - weight: " << R.get() << ")\n"); + << " - weight: " << R.get() << " - factor: " + << format("%0.2f", Probe->Factor) << ")\n"); return Samples; } return R; @@ -1085,7 +1098,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { /// \param InlinedCallSite Output vector for new call sites exposed after /// inlining. bool SampleProfileLoader::tryPromoteAndInlineCandidate( - Function &F, InlineCandidate &Candidate, uint64_t &Sum, + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, DenseSet &PromotedInsns, SmallVector *InlinedCallSite) { const char *Reason = "Callee function not available"; @@ -1106,10 +1119,28 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate( Candidate.CallsiteCount, Sum, false, ORE); if (DI) { Sum -= Candidate.CallsiteCount; + // Prorate the indirect callsite distribution. + // Do not update the promoted direct callsite distribution at this + // point since the original distribution combined with the callee + // profile will be used to prorate callsites from the callee if + // inlined. Once not inlined, the direct callsite distribution should + // be prorated so that the it will reflect the real callsite counts. + setProbeDistributionFactor(*Candidate.CallInstr, + Candidate.CallsiteDistribution * Sum / + SumOrigin); PromotedInsns.insert(Candidate.CallInstr); Candidate.CallInstr = DI; - if (isa(DI) || isa(DI)) - return tryInlineCandidate(Candidate, InlinedCallSite); + if (isa(DI) || isa(DI)) { + bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); + if (!Inlined) { + // Prorate the direct callsite distribution so that it reflects real + // callsite counts. + setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution * + Candidate.CallsiteCount / + SumOrigin); + } + return Inlined; + } } } else { LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " @@ -1216,11 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions( } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); - InlineCandidate Candidate = {I, - LocalNotInlinedCallSites.count(I) - ? LocalNotInlinedCallSites[I] - : nullptr, - 0 /* dummy count */}; + InlineCandidate Candidate = { + I, + LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */, 1.0 /* dummy distribution factor */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1229,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions( continue; uint64_t Sum; for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { + uint64_t SumOrigin = Sum; if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); @@ -1237,8 +1269,9 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI)) continue; - Candidate = {I, FS, FS->getEntrySamples()}; - if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) { + Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; } @@ -1343,6 +1376,23 @@ bool SampleProfileLoader::tryInlineCandidate( if (ProfileIsCS) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); ++NumCSInlined; + + // Prorate inlined probes for a duplicated inlining callsite which probably + // has a distribution less than 100%. Samples for an inlinee should be + // distributed among the copies of the original callsite based on each + // callsite's distribution factor for counts accuracy. Note that an inlined + // probe may come with its own distribution factor if it has been duplicated + // in the inlinee body. The two factor are multiplied to reflect the + // aggregation of duplication. + if (Candidate.CallsiteDistribution < 1) { + for (auto &I : IFI.InlinedCallSites) { + if (Optional Probe = extractProbe(*I)) + setProbeDistributionFactor(*I, Probe->Factor * + Candidate.CallsiteDistribution); + } + NumDuplicatedInlinesite++; + } + return true; } return false; @@ -1360,14 +1410,19 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, if (!CalleeSamples) return false; + float Factor = 1.0; + if (Optional Probe = extractProbe(*CB)) + Factor = Probe->Factor; + uint64_t CallsiteCount = 0; ErrorOr Weight = getBlockWeight(CB->getParent()); if (Weight) CallsiteCount = Weight.get(); if (CalleeSamples) - CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples()); + CallsiteCount = std::max( + CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); - *NewCandidate = {CB, CalleeSamples, CallsiteCount}; + *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1479,6 +1534,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( uint64_t Sum; auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); uint64_t SumOrigin = Sum; + Sum *= Candidate.CallsiteDistribution; for (const auto *FS : CalleeSamples) { // TODO: Consider disable pre-lTO ICP for MonoLTO as well if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1486,7 +1542,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( PSI->getOrCompHotCountThreshold()); continue; } - uint64_t EntryCountDistributed = FS->getEntrySamples(); + uint64_t EntryCountDistributed = + FS->getEntrySamples() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1505,9 +1562,10 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( SmallVector InlinedCallSites; // Attach function profile for promoted indirect callee, and update // call site count for the promoted inline candidate too. - Candidate = {I, FS, EntryCountDistributed}; - if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns, - &InlinedCallSites)) { + Candidate = {I, FS, EntryCountDistributed, + Candidate.CallsiteDistribution}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); @@ -1965,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) { auto T = FS->findCallTargetMapAt(CallSite); if (!T || T.get().empty()) continue; + // Prorate the callsite counts to reflect what is already done to the + // callsite, such as ICP or calliste cloning. + if (FunctionSamples::ProfileIsProbeBased) { + if (Optional Probe = extractProbe(I)) { + if (Probe->Factor < 1) + T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); + } + } SmallVector SortedCallTargets = GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 7cecd20b78d8..a885c3ee4ded 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -12,6 +12,7 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,8 +26,10 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include #include using namespace llvm; @@ -35,6 +38,115 @@ using namespace llvm; STATISTIC(ArtificialDbgLine, "Number of probes that have an artificial debug line"); +static cl::opt + VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden, + cl::desc("Do pseudo probe verification")); + +static cl::list VerifyPseudoProbeFuncList( + "verify-pseudo-probe-funcs", cl::Hidden, + cl::desc("The option to specify the name of the functions to verify.")); + +static cl::opt + UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, + cl::desc("Update pseudo probe distribution factor")); + +bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) { + // Skip function declaration. + if (F->isDeclaration()) + return false; + // Skip function that will not be emitted into object file. The prevailing + // defintion will be verified instead. + if (F->hasAvailableExternallyLinkage()) + return false; + // Do a name matching. + static std::unordered_set VerifyFuncNames( + VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end()); + return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str()); +} + +void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (VerifyPseudoProbe) { + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + this->runAfterPass(P, IR); + }); + } +} + +// Callback to run after each transformation for the new pass manager. +void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) { + std::string Banner = + "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n"; + dbgs() << Banner; + if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else + llvm_unreachable("Unknown IR unit"); +} + +void PseudoProbeVerifier::runAfterPass(const Module *M) { + for (const Function &F : *M) + runAfterPass(&F); +} + +void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) { + for (const LazyCallGraph::Node &N : *C) + runAfterPass(&N.getFunction()); +} + +void PseudoProbeVerifier::runAfterPass(const Function *F) { + if (!shouldVerifyFunction(F)) + return; + ProbeFactorMap ProbeFactors; + for (const auto &BB : *F) + collectProbeFactors(&BB, ProbeFactors); + verifyProbeFactors(F, ProbeFactors); +} + +void PseudoProbeVerifier::runAfterPass(const Loop *L) { + const Function *F = L->getHeader()->getParent(); + runAfterPass(F); +} + +void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block, + ProbeFactorMap &ProbeFactors) { + for (const auto &I : *Block) { + if (Optional Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += Probe->Factor; + } +} + +void PseudoProbeVerifier::verifyProbeFactors( + const Function *F, const ProbeFactorMap &ProbeFactors) { + bool BannerPrinted = false; + auto &PrevProbeFactors = FunctionProbeFactors[F->getName()]; + for (const auto &I : ProbeFactors) { + float CurProbeFactor = I.second; + if (PrevProbeFactors.count(I.first)) { + float PrevProbeFactor = PrevProbeFactors[I.first]; + if (std::abs(CurProbeFactor - PrevProbeFactor) > + DistributionFactorVariance) { + if (!BannerPrinted) { + dbgs() << "Function " << F->getName() << ":\n"; + BannerPrinted = true; + } + dbgs() << "Probe " << I.first << "\tprevious factor " + << format("%0.2f", PrevProbeFactor) << "\tcurrent factor " + << format("%0.2f", CurProbeFactor) << "\n"; + } + } + + // Update + PrevProbeFactors[I.first] = I.second; + } +} + PseudoProbeManager::PseudoProbeManager(const Module &M) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { Function *ProbeFn = llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe); Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index), - Builder.getInt32(0)}; + Builder.getInt32(0), + Builder.getInt64(PseudoProbeFullDistributionFactor)}; auto *Probe = Builder.CreateCall(ProbeFn, Args); AssignDebugLoc(Probe); } @@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { // Levarge the 32-bit discriminator field of debug data to store the ID and // type of a callsite probe. This gets rid of the dependency on plumbing a // customized metadata through the codegen pipeline. - uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type); + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor); if (auto DIL = Call->getDebugLoc()) { DIL = DIL->cloneWithDiscriminator(V); Call->setDebugLoc(DIL); @@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M, return PreservedAnalyses::none(); } + +void PseudoProbeUpdatePass::runOnFunction(Function &F, + FunctionAnalysisManager &FAM) { + BlockFrequencyInfo &BFI = FAM.getResult(F); + auto BBProfileCount = [&BFI](BasicBlock *BB) { + return BFI.getBlockProfileCount(BB) + ? BFI.getBlockProfileCount(BB).getValue() + : 0; + }; + + // Collect the sum of execution weight for each probe. + ProbeFactorMap ProbeFactors; + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += BBProfileCount(&Block); + } + } + + // Fix up over-counted probes. + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional Probe = extractProbe(I)) { + float Sum = ProbeFactors[Probe->Id]; + if (Sum != 0) + setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum); + } + } + } +} + +PreservedAnalyses PseudoProbeUpdatePass::run(Module &M, + ModuleAnalysisManager &AM) { + if (UpdatePseudoProbe) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + runOnFunction(F, FAM); + } + } + return PreservedAnalyses::none(); +} diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof new file mode 100644 index 000000000000..62f9bd5992e7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof @@ -0,0 +1,8 @@ +foo:3200:13 + 1: 13 + 2: 7 + 3: 6 + 4: 13 + 5: 7 + 6: 6 + !CFGChecksum: 844530426352218 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll index 7e3c7e8deda2..4f730ba09a3a 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll @@ -11,14 +11,14 @@ ; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ define dso_local void @foo2() !dbg !7 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 ret void, !dbg !10 } define dso_local void @foo() #0 !dbg !11 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]] ; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 call void @foo2(), !dbg !12 @@ -26,9 +26,9 @@ define dso_local void @foo() #0 !dbg !11 { } define dso_local i32 @entry() !dbg !14 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]] ; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 ; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 @@ -41,13 +41,13 @@ define dso_local i32 @entry() !dbg !14 { ; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo" ; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]]) ; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]]) -;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe -;; with an index of 2. -; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751) +;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe +;; with an index of 2 and a scale of 100%. +; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551) ; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry" ; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3, scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]]) ; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]]) -; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751) +; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551) ; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]]) ; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1]], inlinedAt: ![[#INL2]]) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll index 2074b708380f..da5d46a32287 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -11,32 +11,36 @@ ;; Check the generation of pseudoprobe intrinsic call. +@a = dso_local global i32 0, align 4 + define void @foo(i32 %x) !dbg !3 { bb0: %cmp = icmp eq i32 %x, 0 -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 br i1 %cmp, label %bb1, label %bb2 bb1: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 + store i32 6, i32* @a, align 4 br label %bb3 bb2: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 + store i32 8, i32* @a, align 4 br label %bb3 bb3: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]] ret void, !dbg !12 } @@ -44,7 +48,7 @@ declare void @bar(i32 %x) define internal void @foo2(void (i32)* %f) !dbg !4 { entry: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 ; Check pseudo_probe metadata attached to the indirect call instruction. @@ -64,13 +68,13 @@ entry: ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]]) ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]]) ; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]]) -;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe +;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe ;; with an index of 2. -; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887) +; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687) ; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) -;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe +;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe ;; with an index of 3. -; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759) +; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559) ; Check the generation of .pseudo_probe_desc section ; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index 5359fd4da067..055d41792290 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -12,18 +12,18 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local i32 @foo(i32 %x) #0 !dbg !12 { entry: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1) %add = add nsw i32 %x, 100000, !dbg !19 ;; Check zen is fully inlined so there's no call to zen anymore. ;; Check code from the inlining of zen is properly annotated here. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]] -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1) ; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]] -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1) ; CHECK-NOT: call i32 @zen %call = call i32 @zen(i32 %add), !dbg !20 ret i32 %call, !dbg !21 @@ -32,36 +32,36 @@ entry: ; CHECK: define dso_local i32 @zen define dso_local i32 @zen(i32 %x) #0 !dbg !22 { entry: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1) %cmp = icmp sgt i32 %x, 0, !dbg !26 br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28 while.cond: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1) %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ] %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29 br i1 %cmp1, label %while.body, label %if.end, !dbg !31 while.body: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1) %0 = load volatile i32, i32* @factor, align 4, !dbg !32 %sub = sub nsw i32 %x.addr.0, %0, !dbg !39 br label %while.cond, !dbg !31 while.cond2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1) %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ] %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42 br i1 %cmp3, label %while.body4, label %if.end, !dbg !44 while.body4: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1) %1 = load volatile i32, i32* @factor, align 4, !dbg !45 %add = add nsw i32 %x.addr.1, %1, !dbg !48 br label %while.cond2, !dbg !44 if.end: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1) %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ] ret i32 %x.addr.2, !dbg !51 } @@ -109,6 +109,10 @@ if.end: ;YAML-NEXT: - NumSamples: '23' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '23' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... ;YAML: --- !Analysis @@ -121,6 +125,10 @@ if.end: ;YAML-NEXT: - NumSamples: '23' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '23' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... ;YAML: --- !Analysis @@ -133,6 +141,10 @@ if.end: ;YAML-NEXT: - NumSamples: '382920' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '382920' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll index 25fd04e9d710..34629a3743eb 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll @@ -8,26 +8,26 @@ entry: store i32 %x, i32* %x.addr, align 4 %0 = load i32, i32* %x.addr, align 4 %cmp = icmp eq i32 %0, 0 - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) br i1 %cmp, label %if.then, label %if.else ; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]] if.then: ; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]] call void %f(i32 1) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) store i32 1, i32* %retval, align 4 br label %return if.else: ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]] call void %f(i32 2) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) store i32 2, i32* %retval, align 4 br label %return return: - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) %1 = load i32, i32* %retval, align 4 ret i32 %1 } @@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"} ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7} ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) -;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe +;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe ;; with an index of 5. -; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911) +; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} -; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) -;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe +;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe ;; with an index of 6. -; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919) +; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) +; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719) ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2} !llvm.module.flags = !{!9, !10} @@ -69,6 +69,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '13' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -80,6 +84,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '5' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '7' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -91,6 +99,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '7' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -102,6 +114,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '6' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '6' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -113,6 +129,10 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '3' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '6' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -124,4 +144,8 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '4' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '13' ;YAML-NEXT: - String: ')' diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll new file mode 100644 index 000000000000..992afedd14f7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S | FileCheck %s + +declare i32 @f1() +declare i32 @f2() +declare void @f3() + + +;; This tests that the branch in 'merge' can be cloned up into T1. +define i32 @foo(i1 %cond, i1 %cond2) #0 { +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) + br i1 %cond, label %T1, label %F1 +T1: +; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]] + %v1 = call i32 @f1() +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) +;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) + %cond3 = icmp eq i32 %v1, 412 + br label %Merge +F1: +; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]] + %v2 = call i32 @f2() +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) +;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968) + br label %Merge +Merge: + + %A = phi i1 [%cond3, %T1], [%cond2, %F1] + %B = phi i32 [%v1, %T1], [%v2, %F1] + br i1 %A, label %T2, label %F2 +T2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) + call void @f3() + ret i32 %B +F2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) + ret i32 %B +} + +; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7} +; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6} + +attributes #0 = {"use-sample-profile"} + diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll new file mode 100644 index 000000000000..fd57dd8bc526 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll @@ -0,0 +1,77 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY +; RUN: FileCheck %s < %t + +; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass *** +; VERIFY: Function foo: +; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00 +; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00 + +declare void @foo2() nounwind + +define void @foo(i32 %x) { +bb: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) + %tmp = alloca [5 x i32*], align 16 + br label %bb7.preheader + +bb3.loopexit: + %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ] + %tmp5.not = icmp eq i32 %spec.select.lcssa, 0 + br i1 %tmp5.not, label %bb24, label %bb7.preheader + +bb7.preheader: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) + %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ] + br label %bb10 + +bb10: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) + %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] + %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] + %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv + %tmp14 = load i32*, i32** %tmp13, align 8 + %tmp15.not = icmp ne i32* %tmp14, null + %tmp18 = sext i1 %tmp15.not to i32 + %spec.select = add nsw i32 %tmp1.14, %tmp18 + call void @foo2(), !dbg !12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 5 + br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13 + +bb24: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) + ret void +} + +;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe +;; with an index of 6 and a scale of -1%. +; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]]) +; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{!"clang version 3.9.0"} +!12 = !DILocation(line: 2, column: 20, scope: !4) +!13 = distinct !{!13, !14} +!14 = !{!"llvm.loop.unroll.full"} From ad2086658df181369a09ad69dac260a41dbab814 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 3 Feb 2021 20:57:59 -0500 Subject: [PATCH 058/244] [OpenMP][NVPTX] Take functions in `deviceRTLs` as `convergent` OpenMP device compiler (similar to other SPMD compilers) assumes that functions are convergent by default to avoid invalid transformations, such as the bug (https://bugs.llvm.org/show_bug.cgi?id=49021). Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95971 (cherry picked from commit 0f0ce3c12edefd25448e39c4d20718a10d3d42c1) --- clang/lib/Frontend/CompilerInvocation.cpp | 2 + .../OpenMP/target_attribute_convergent.cpp | 13 +++ .../libomptarget/test/offloading/bug49021.cpp | 85 +++++++++++++++++++ 3 files changed, 100 insertions(+) create mode 100644 clang/test/OpenMP/target_attribute_convergent.cpp create mode 100644 openmp/libomptarget/test/offloading/bug49021.cpp diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d8be4ea14868..036388ebd355 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2470,6 +2470,8 @@ void CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); + Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice; + if (Opts.OpenMP || Opts.OpenMPSimd) { if (int Version = getLastArgIntValue( Args, OPT_fopenmp_version_EQ, diff --git a/clang/test/OpenMP/target_attribute_convergent.cpp b/clang/test/OpenMP/target_attribute_convergent.cpp new file mode 100644 index 000000000000..932214e987c8 --- /dev/null +++ b/clang/test/OpenMP/target_attribute_convergent.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s +// expected-no-diagnostics + +#pragma omp declare target + +void foo() {} + +#pragma omp end declare target + +// CHECK: Function Attrs: {{.*}}convergent{{.*}} +// CHECK: define hidden void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]] +// CHECK: attributes [[ATTRIBUTE_NUMBER]] = { {{.*}}convergent{{.*}} } diff --git a/openmp/libomptarget/test/offloading/bug49021.cpp b/openmp/libomptarget/test/offloading/bug49021.cpp new file mode 100644 index 000000000000..bcdbf68b10e0 --- /dev/null +++ b/openmp/libomptarget/test/offloading/bug49021.cpp @@ -0,0 +1,85 @@ +// RUN: %libomptarget-compilexx-aarch64-unknown-linux-gnu -O3 && %libomptarget-run-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-powerpc64-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-powerpc64le-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-x86_64-pc-linux-gnu -O3 && %libomptarget-run-x86_64-pc-linux-gnu +// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda -O3 && %libomptarget-run-nvptx64-nvidia-cuda + +#include + +template int test_map() { + std::cout << "map(complex<>)" << std::endl; + T a(0.2), a_check; +#pragma omp target map(from : a_check) + { a_check = a; } + + if (a_check != a) { + std::cout << " wrong results"; + return 1; + } + + return 0; +} + +template int test_reduction() { + std::cout << "flat parallelism" << std::endl; + T sum(0), sum_host(0); + const int size = 100; + T array[size]; + for (int i = 0; i < size; i++) { + array[i] = i; + sum_host += array[i]; + } + +#pragma omp target teams distribute parallel for map(to: array[:size]) \ + reduction(+ : sum) + for (int i = 0; i < size; i++) + sum += array[i]; + + if (sum != sum_host) + std::cout << " wrong results " << sum << " host " << sum_host << std::endl; + + std::cout << "hierarchical parallelism" << std::endl; + const int nblock(10), block_size(10); + T block_sum[nblock]; +#pragma omp target teams distribute map(to \ + : array[:size]) \ + map(from \ + : block_sum[:nblock]) + for (int ib = 0; ib < nblock; ib++) { + T partial_sum = 0; + const int istart = ib * block_size; + const int iend = (ib + 1) * block_size; +#pragma omp parallel for reduction(+ : partial_sum) + for (int i = istart; i < iend; i++) + partial_sum += array[i]; + block_sum[ib] = partial_sum; + } + + sum = 0; + for (int ib = 0; ib < nblock; ib++) { + sum += block_sum[ib]; + } + + if (sum != sum_host) { + std::cout << " wrong results " << sum << " host " << sum_host << std::endl; + return 1; + } + + return 0; +} + +template int test_complex() { + int ret = 0; + ret |= test_map(); + ret |= test_reduction(); + return ret; +} + +int main() { + int ret = 0; + std::cout << "Testing float" << std::endl; + ret |= test_complex(); + std::cout << "Testing double" << std::endl; + ret |= test_complex(); + return ret; +} From e8cdcaeae406527c9a76b3dc5c522391c81dfdfd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 1 Feb 2021 10:56:09 -0800 Subject: [PATCH 059/244] [X86] Accept 64-bit GPRs for vextractps when using a register that requires EVEX. This is consistent with the VEX version. It also fixes a sorting issue in the matching table that caused the EVEX version to be prioritized over VEX in intel syntax. Fixes issue [2] from PR48991. (cherry picked from commit c691fe14da93a7c9eff466231515d6d4d16124fa) --- llvm/lib/Target/X86/X86InstrAVX512.td | 4 ++-- llvm/test/MC/X86/intel-syntax-x86-64-avx.s | 4 ++++ llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0c2b278fdd7b..19012797ae9a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, EXTRACT_get_vextract256_imm, [HasAVX512]>; // vextractps - extract 32 bits from XMM -def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), +def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, + [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX, VEX_WIG, Sched<[WriteVecExtract]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s index bb57cb287f38..c1f20d204a8c 100644 --- a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s +++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s @@ -167,3 +167,7 @@ // CHECK: vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910] // CHECK: encoding: [0xc4,0xa1,0x6d,0xf5,0x8c,0xf1,0x02,0x00,0x00,0xe0] vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910] + +// CHECK: vextractps ecx, xmm2, 1 +// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd1,0x01] + vextractps ecx, xmm2, 1 diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s index 29bde03c5860..31c43afe5017 100644 --- a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s +++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s @@ -1260,3 +1260,6 @@ // CHECK: encoding: [0x62,0xf1,0x7e,0x89,0xe6,0x11] vcvtdq2pd xmm2 {k1} {z}, qword ptr [rcx] +// CHECK: vextractps ecx, xmm17, 1 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0xc9,0x01] + vextractps rcx, xmm17, 1 From 7fad20eccc4f9fe5d03b2e381e26e8eb13a3e3be Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 4 Feb 2021 08:44:20 -0500 Subject: [PATCH 060/244] Revert "[OpenMP] Disabled profiling in `libomp` by default to unblock link errors" This reverts commit f5602e0bf31ab590da19fa357980a753dbfd666e. --- openmp/CMakeLists.txt | 6 ------ openmp/docs/design/Runtimes.rst | 5 +---- openmp/runtime/CMakeLists.txt | 6 +++--- openmp/runtime/src/CMakeLists.txt | 12 +----------- openmp/runtime/src/kmp_config.h.cmake | 4 ++-- openmp/runtime/src/kmp_runtime.cpp | 6 +++--- 6 files changed, 10 insertions(+), 29 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 4787d4b5a321..67600bebdafb 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -86,12 +86,6 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) -option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) - -# Build host runtime library, after LIBOMPTARGET variables are set since they are needed -# to enable time profiling support in the OpenMP runtime. -add_subdirectory(runtime) - if (OPENMP_ENABLE_LIBOMPTARGET) # Check that the library can actually be built. if (APPLE OR WIN32) diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index ad36e43eccdc..016b88ba324b 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -48,10 +48,7 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ for time trace output. Using this library is enabled by default when building using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will -be saved to the filename specified by the environment variable. For multi-threaded -applications, profiling in ``libomp`` is also needed. Setting the CMake option -``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will -turn ``libomp`` into a C++ library. +be saved to the filename specified by the environment variable. .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 8828ff8ef455..9fdd04f41646 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -34,6 +34,7 @@ if(${OPENMP_STANDALONE_BUILD}) # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") + set(LIBOMPTARGET_PROFILING_SUPPORT FALSE) else() # Part of LLVM build # Determine the native architecture from LLVM. string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH) @@ -65,11 +66,10 @@ else() # Part of LLVM build libomp_get_architecture(LIBOMP_ARCH) endif () set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS}) + # Time profiling support + set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING}) endif() -# Time profiling support -set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING}) - # FUJITSU A64FX is a special processor because its cache line size is 256. # We need to pass this information into kmp_config.h. if(LIBOMP_ARCH STREQUAL "aarch64") diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 822f9ca2b825..2e927df84f5c 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,14 +50,6 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() -# Building with time profiling support requires LLVM directory includes. -if(LIBOMP_PROFILING_SUPPORT) - include_directories( - ${LLVM_MAIN_INCLUDE_DIR} - ${LLVM_INCLUDE_DIR} - ) -endif() - # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) @@ -143,7 +135,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS) libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS) # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled. -if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING)) +if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES}) # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}) @@ -152,8 +144,6 @@ else() LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS} LINK_COMPONENTS Support ) - # libomp must be a C++ library such that it can link libLLVMSupport - set(LIBOMP_LINKER_LANGUAGE CXX) endif() set_target_properties(omp PROPERTIES diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index f6aee7197ee8..3d682c690fc7 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMP_PROFILING_SUPPORT -#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT +#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT +#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index a6e32bd008e1..4a0634d59cff 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMP_PROFILING_SUPPORT +#if OMPTARGET_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMP_PROFILING_SUPPORT +#if OMPTARGET_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMP_PROFILING_SUPPORT +#if OMPTARGET_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; From bc2dad1671598a87423c61c355d03db49ce76907 Mon Sep 17 00:00:00 2001 From: Peter Waller Date: Tue, 26 Jan 2021 11:55:24 +0000 Subject: [PATCH 061/244] [clang][aarch64][WOA64][docs] Release note for longjmp crash with /guard:cf Add a release note workaround for PR47463. Bug: https://bugs.llvm.org/show_bug.cgi?id=47463 Differential Revision: https://reviews.llvm.org/D95435 --- clang/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9efd4c01f053..c17d84de320c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -153,6 +153,11 @@ Windows Support - Implicitly add ``.exe`` suffix for MinGW targets, even when cross compiling. (This matches a change from GCC 8.) +- Windows on Arm64: programs using the C standard library's setjmp and longjmp + functions may crash with a "Security check failure or stack buffer overrun" + exception. To workaround (with reduced security), compile with + /guard:cf,nolongjmp. + C Language Changes in Clang --------------------------- From 66c7b449acf402bdc87b69db5778b7b43958d217 Mon Sep 17 00:00:00 2001 From: Giorgis Georgakoudis Date: Mon, 25 Jan 2021 14:10:50 -0800 Subject: [PATCH 062/244] [OpenMP] Fix building using LLVM_ENABLE_RUNTIMES Fix when time profiling is enabled. Related to: D94855 Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D95398 (cherry picked from commit bb40e6731843de92f1c73ad6efceb8a89e045ea6) --- openmp/CMakeLists.txt | 10 +++++----- openmp/runtime/src/CMakeLists.txt | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 67600bebdafb..f89857dc98d6 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -55,11 +55,6 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") - -# Build host runtime library. -add_subdirectory(runtime) - - set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, @@ -86,6 +81,11 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) + +# Build host runtime library, after LIBOMPTARGET variables are set since they are needed +# to enable time profiling support in the OpenMP runtime. +add_subdirectory(runtime) + if (OPENMP_ENABLE_LIBOMPTARGET) # Check that the library can actually be built. if (APPLE OR WIN32) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 2e927df84f5c..9c5dba55b705 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,6 +50,15 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() +# Building with time profiling support for libomptarget requires +# LLVM directory includes. +if(LIBOMPTARGET_PROFILING_SUPPORT) + include_directories( + ${LLVM_MAIN_INCLUDE_DIR} + ${LLVM_INCLUDE_DIR} + ) +endif() + # Getting correct source files to build library set(LIBOMP_CXXFILES) set(LIBOMP_ASMFILES) From 92a5106e8055bab7da46095a832904444862728b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 28 Jan 2021 07:24:19 -0500 Subject: [PATCH 063/244] [OpenMP] Disabled profiling in `libomp` by default to unblock link errors Link error occurred when time profiling in libomp is enabled by default because `libomp` is assumed to be a C library but the dependence on `libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all OpenMP tests in Phabricator. This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to enable/disable the feature. By default it is disabled. Note that once time profiling is enabled for `libomp`, it becomes a C++ library. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95585 (cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2) --- openmp/CMakeLists.txt | 1 + openmp/docs/design/Runtimes.rst | 5 ++++- openmp/runtime/CMakeLists.txt | 6 +++--- openmp/runtime/src/CMakeLists.txt | 9 +++++---- openmp/runtime/src/kmp_config.h.cmake | 4 ++-- openmp/runtime/src/kmp_runtime.cpp | 6 +++--- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index f89857dc98d6..b8a2822877e3 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -81,6 +81,7 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." ${ENABLE_LIBOMPTARGET}) option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget." ${ENABLE_LIBOMPTARGET}) +option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) # Build host runtime library, after LIBOMPTARGET variables are set since they are needed # to enable time profiling support in the OpenMP runtime. diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 016b88ba324b..ad36e43eccdc 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_ for time trace output. Using this library is enabled by default when building using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will -be saved to the filename specified by the environment variable. +be saved to the filename specified by the environment variable. For multi-threaded +applications, profiling in ``libomp`` is also needed. Setting the CMake option +``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will +turn ``libomp`` into a C++ library. .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 9fdd04f41646..8828ff8ef455 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD}) # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") - set(LIBOMPTARGET_PROFILING_SUPPORT FALSE) else() # Part of LLVM build # Determine the native architecture from LLVM. string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH) @@ -66,10 +65,11 @@ else() # Part of LLVM build libomp_get_architecture(LIBOMP_ARCH) endif () set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS}) - # Time profiling support - set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING}) endif() +# Time profiling support +set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING}) + # FUJITSU A64FX is a special processor because its cache line size is 256. # We need to pass this information into kmp_config.h. if(LIBOMP_ARCH STREQUAL "aarch64") diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 9c5dba55b705..822f9ca2b825 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -50,9 +50,8 @@ if(${LIBOMP_USE_HWLOC}) include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include) endif() -# Building with time profiling support for libomptarget requires -# LLVM directory includes. -if(LIBOMPTARGET_PROFILING_SUPPORT) +# Building with time profiling support requires LLVM directory includes. +if(LIBOMP_PROFILING_SUPPORT) include_directories( ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_INCLUDE_DIR} @@ -144,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS) libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS) # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled. -if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING)) +if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING)) add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES}) # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}) @@ -153,6 +152,8 @@ else() LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS} LINK_COMPONENTS Support ) + # libomp must be a C++ library such that it can link libLLVMSupport + set(LIBOMP_LINKER_LANGUAGE CXX) endif() set_target_properties(omp PROPERTIES diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 3d682c690fc7..f6aee7197ee8 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -44,8 +44,8 @@ #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #cmakedefine01 LIBOMP_OMPT_SUPPORT #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT -#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT -#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT +#cmakedefine01 LIBOMP_PROFILING_SUPPORT +#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 4a0634d59cff..a6e32bd008e1 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -32,7 +32,7 @@ #include "ompt-specific.h" #endif -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT #include "llvm/Support/TimeProfiler.h" static char *ProfileTraceFile = nullptr; #endif @@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { /* ------------------------------------------------------------------------ */ void *__kmp_launch_thread(kmp_info_t *this_thr) { -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE"); // TODO: add a configuration option for time granularity if (ProfileTraceFile) @@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); KMP_MB(); -#if OMPTARGET_PROFILING_SUPPORT +#if OMP_PROFILING_SUPPORT llvm::timeTraceProfilerFinishThread(); #endif return this_thr; From 72f12467ded52160d52025e13a6217f00fe25f68 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 4 Feb 2021 13:26:59 +0100 Subject: [PATCH 064/244] Add a release note about deprecating the clang-cl /fallback flag As discussed in https://lists.llvm.org/pipermail/cfe-dev/2021-January/067524.html The flag has been removed on the main branch in D95876. Differential revision: https://reviews.llvm.org/D96016 --- clang/docs/ReleaseNotes.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c17d84de320c..f4ca8a855142 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -109,6 +109,10 @@ Deprecated Compiler Flags The following options are deprecated and ignored. They will be removed in future versions of Clang. +- The clang-cl ``/fallback`` flag, which made clang-cl invoke Microsoft Visual + C++ on files it couldn't compile itself, has been deprecated. It will be + removed in Clang 13. + - ... Modified Compiler Flags From 4e7933905578456a30b281bbbe832d8d938feed0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 5 Feb 2021 01:40:33 +0000 Subject: [PATCH 065/244] workflows: Update libclang-abi-tests to work with minor release baselines --- .github/workflows/libclang-abi-tests.yml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 5681c7c8166e..320a88c1d407 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -20,6 +20,7 @@ jobs: ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }} BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + BASELINE_VERSION_MINOR: ${{ steps.vars.outputs.BASELINE_VERSION_MINOR }} LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} @@ -36,16 +37,35 @@ jobs: - name: Setup Variables id: vars run: | + minor_version=0 + remote_repo='https://github.com/llvm/llvm-project' if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then - echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + major_version=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1)) + baseline_ref="$major_version.0.0" + + # If there is a minor release, we want to use that as the base line. + minor_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9] | tail -n1 | grep -o 'llvmorg-.\+' || true` + if [ -n "$minor_ref" ]; then + baseline_ref=$minor_ref + else + # Check if we have a release candidate + rc_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9]-rc* | tail -n1 | grep -o 'llvmorg-.\+' || true` + if [ -n "$rc_ref" ]; then + baseline_ref=$rc_ref + fi + fi + echo ::set-output name=BASELINE_VERSION_MAJOR::$major_version + echo ::set-output name=BASELINE_REF::$baseline_ref echo ::set-output name=ABI_HEADERS::clang-c echo ::set-output name=ABI_LIBS::libclang.so else echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + echo ::set-output name=BASELINE_REF::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0 echo ::set-output name=ABI_HEADERS::. echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so fi + abi-dump: needs: abi-dump-setup runs-on: ubuntu-latest @@ -57,7 +77,7 @@ jobs: include: - name: build-baseline llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} - ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 + ref: ${{ needs.abi-dump-setup.outputs.BASELINE_REF }} repo: llvm/llvm-project - name: build-latest llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} From 81febec8a327ecbe83575ac280c2931718ab5e33 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 29 Jan 2021 12:56:23 +0100 Subject: [PATCH 066/244] [MemCpyOpt] Add test for incorrect optimization across lifetime (NFC) This only affects the MemorySSA-based implementation. --- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 43 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 1d2b699ee96d..5dc13ca10054 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. @@ -9,8 +9,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 -define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) { -; CHECK-LABEL: @_ZN4CordC2EOS_( +define void @call_slot(i8* nocapture dereferenceable(16) %arg1) { +; CHECK-LABEL: @call_slot( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[ARG1:%.*]], i64 7 ; CHECK-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 @@ -27,4 +27,39 @@ bb: ret void } +; FIXME: Miscompile. +define void @memcpy_memcpy_across_lifetime(i8* noalias %p1, i8* noalias %p2, i8* noalias %p3) { +; NO_MSSA-LABEL: @memcpy_memcpy_across_lifetime( +; NO_MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; NO_MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 +; NO_MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) +; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; NO_MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) +; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false) +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @memcpy_memcpy_across_lifetime( +; MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 +; MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) +; MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; MSSA-NEXT: ret void +; + %a = alloca [16 x i8] + %a8 = bitcast [16 x i8]* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* %a8) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a8, i8* %p1, i64 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p2, i8* %a8, i64 16, i1 false) + call void @llvm.lifetime.end.p0i8(i64 16, i8* %a8) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p3, i8* %p2, i64 16, i1 false) + ret void +} + attributes #1 = { argmemonly nounwind } From 12a772b1a09a1b5c3f43d08c2804973506b8a859 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 31 Jan 2021 17:55:24 +0100 Subject: [PATCH 067/244] [MemorySSA] Don't treat lifetime.end as NoAlias MemorySSA currently treats lifetime.end intrinsics as not aliasing anything. This breaks MemorySSA-based MemCpyOpt, because we'll happily move a read of a pointer below a lifetime.end intrinsic, as no clobber is reported. I think the MemorySSA modelling here isn't correct: lifetime.end(p) has approximately the same effect as doing a memcpy(p, undef), and should be treated as a clobber. This patch removes the special handling of lifetime.end, leaving alias analysis to handle it appropriately. Differential Revision: https://reviews.llvm.org/D95763 --- llvm/lib/Analysis/MemorySSA.cpp | 26 -------------- .../Analysis/MemorySSA/lifetime-simple.ll | 9 +++-- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 36 +++++++------------ 3 files changed, 16 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index 52dca7d378e1..4722b68e20e9 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -281,7 +281,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, // clobbers where they don't really exist at all. Please see D43269 for // context. switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: @@ -358,22 +357,6 @@ struct UpwardsMemoryQuery { } // end anonymous namespace -static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc, - BatchAAResults &AA) { - Instruction *Inst = MD->getMemoryInst(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - switch (II->getIntrinsicID()) { - case Intrinsic::lifetime_end: { - MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1)); - return AA.alias(ArgLoc, Loc) == MustAlias; - } - default: - return false; - } - } - return false; -} - template static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, const Instruction *I) { @@ -1465,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( } MemoryDef *MD = cast(VersionStack[UpperBound]); - // If the lifetime of the pointer ends at this instruction, it's live on - // entry. - if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) { - // Reset UpperBound to liveOnEntryDef's place in the stack - UpperBound = 0; - FoundClobberResult = true; - LocInfo.AR = MustAlias; - break; - } ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); if (CA.IsClobber) { FoundClobberResult = true; diff --git a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll index 33327c5539f6..2d0481c18415 100644 --- a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll +++ b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll @@ -1,8 +1,7 @@ ; RUN: opt -basic-aa -print-memoryssa -verify-memoryssa -enable-new-pm=0 -analyze < %s 2>&1 | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='print,verify' -disable-output < %s 2>&1 | FileCheck %s -; This test checks a number of things: -; First, the lifetime markers should not clobber any uses of Q or P. -; Second, the loads of P are MemoryUse(LiveOnEntry) due to the placement of the markers vs the loads. +; This test checks that lifetime markers are considered clobbers of %P, +; and due to lack of noalias information, of %Q as well. define i8 @test(i8* %P, i8* %Q) { entry: @@ -18,10 +17,10 @@ entry: ; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 32, i8* %P) call void @llvm.lifetime.end.p0i8(i64 32, i8* %P) -; CHECK: MemoryUse(liveOnEntry) +; CHECK: MemoryUse(3) ; CHECK-NEXT: %1 = load i8, i8* %P %1 = load i8, i8* %P -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(3) ; CHECK-NEXT: %2 = load i8, i8* %Q %2 = load i8, i8* %Q ret i8 %1 diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 5dc13ca10054..c7e7666307ab 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s +; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. @@ -27,29 +27,17 @@ bb: ret void } -; FIXME: Miscompile. define void @memcpy_memcpy_across_lifetime(i8* noalias %p1, i8* noalias %p2, i8* noalias %p3) { -; NO_MSSA-LABEL: @memcpy_memcpy_across_lifetime( -; NO_MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; NO_MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 -; NO_MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) -; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) -; NO_MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) -; NO_MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false) -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @memcpy_memcpy_across_lifetime( -; MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; MSSA-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 -; MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) -; MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) -; MSSA-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @memcpy_memcpy_across_lifetime( +; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]]) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false) +; CHECK-NEXT: ret void ; %a = alloca [16 x i8] %a8 = bitcast [16 x i8]* %a to i8* From 716eef9ad5b367e5cbcc22c8ac53395f9bdbe7a5 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 4 Feb 2021 20:14:14 -0500 Subject: [PATCH 068/244] [OpenMP][libomptarget] Fixed an issue that device sync is skipped if the kernel doesn't have any argument Currently if there is not kernel argument, device synchronization will be skipped. This can lead to two issues: 1. If there is any device error, it will not be captured; 2. The target region might end before the kernel is done, which is not spec conformant. The test added in this patch only runs on NVPTX platform, although it will not be executed by Phab at all. It also requires `not` which is not available on most systems. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D96067 (cherry picked from commit b68a6b09e60a24733b923a0fc282746a855852da) --- openmp/libomptarget/src/omptarget.cpp | 22 +++++++++++++++---- .../libomptarget/test/offloading/assert.cpp | 8 +++++++ 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/assert.cpp diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 90966d25fb26..e4b7b18bc70b 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -451,6 +451,17 @@ struct DeallocTgtPtrInfo { : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), HasCloseModifier(HasCloseModifier) {} }; + +/// Synchronize device +static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) { + assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo"); + if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) { + REPORT("Failed to synchronize device.\n"); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} } // namespace /// Internal function to undo the mapping and retrieve the data from the device. @@ -631,11 +642,9 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, // AsyncInfo->Queue will not be nullptr, so again, we don't need to // synchronize. if (AsyncInfo && AsyncInfo->Queue) { - Ret = Device.synchronize(AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Failed to synchronize device.\n"); + Ret = syncDevice(Device, AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } } // Deallocate target pointer @@ -1307,6 +1316,11 @@ int target(ident_t *loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, REPORT("Failed to process data after launching the kernel.\n"); return OFFLOAD_FAIL; } + } else if (AsyncInfo.Queue) { + // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't + // hava any argument, and the device supports async operations, so we need a + // sync at this point. + return syncDevice(Device, &AsyncInfo); } return OFFLOAD_SUCCESS; diff --git a/openmp/libomptarget/test/offloading/assert.cpp b/openmp/libomptarget/test/offloading/assert.cpp new file mode 100644 index 000000000000..00112dd92cc6 --- /dev/null +++ b/openmp/libomptarget/test/offloading/assert.cpp @@ -0,0 +1,8 @@ +// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda && %libomptarget-run-fail-nvptx64-nvidia-cuda + +int main(int argc, char *argv[]) { +#pragma omp target + { __builtin_trap(); } + + return 0; +} From 395ef8d5c67905646b72dd5ef2d8eb60cabb8634 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 2 Feb 2021 16:58:38 -0500 Subject: [PATCH 069/244] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Rename=20inclu?= =?UTF-8?q?de/support=20to=20include/=5F=5Fsupport?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do ship those headers, so the directory name should not be something that can potentially conflict with user-defined directories. This is a cherry-pick of b51756819a85563ae063e98eeb3d6af8e44c8f64. Differential Revision: https://reviews.llvm.org/D96059 --- libcxx/include/CMakeLists.txt | 38 +++++++++---------- libcxx/include/__locale | 20 +++++----- .../android/locale_bionic.h | 6 +-- .../{support => __support}/fuchsia/xlocale.h | 6 +-- .../{support => __support}/ibm/limits.h | 2 +- .../ibm/locale_mgmt_aix.h | 2 +- .../{support => __support}/ibm/nanosleep.h | 0 .../{support => __support}/ibm/support.h | 2 +- .../{support => __support}/ibm/xlocale.h | 7 ++-- .../{support => __support}/musl/xlocale.h | 2 +- .../{support => __support}/newlib/xlocale.h | 6 +-- .../{support => __support}/nuttx/xlocale.h | 6 +-- .../{support => __support}/openbsd/xlocale.h | 4 +- .../solaris/floatingpoint.h | 0 .../{support => __support}/solaris/wchar.h | 0 .../{support => __support}/solaris/xlocale.h | 0 .../win32/limits_msvc_win32.h | 2 +- .../win32/locale_win32.h | 2 +- .../xlocale/__nop_locale_mgmt.h | 2 +- .../xlocale/__posix_l_fallback.h | 2 +- .../xlocale/__strtonum_fallback.h | 2 +- libcxx/include/__threading_support | 2 +- libcxx/include/bit | 2 +- libcxx/include/limits | 4 +- libcxx/src/CMakeLists.txt | 2 +- libcxx/src/locale.cpp | 2 +- libcxx/src/support/solaris/xlocale.cpp | 2 +- libcxx/src/support/win32/locale_win32.cpp | 2 +- libcxx/src/support/win32/support.cpp | 2 +- libcxx/src/support/win32/thread_win32.cpp | 2 +- .../gn/secondary/libcxx/include/BUILD.gn | 38 +++++++++---------- 31 files changed, 85 insertions(+), 84 deletions(-) rename libcxx/include/{support => __support}/android/locale_bionic.h (90%) rename libcxx/include/{support => __support}/fuchsia/xlocale.h (74%) rename libcxx/include/{support => __support}/ibm/limits.h (97%) rename libcxx/include/{support => __support}/ibm/locale_mgmt_aix.h (96%) rename libcxx/include/{support => __support}/ibm/nanosleep.h (100%) rename libcxx/include/{support => __support}/ibm/support.h (95%) rename libcxx/include/{support => __support}/ibm/xlocale.h (97%) rename libcxx/include/{support => __support}/musl/xlocale.h (95%) rename libcxx/include/{support => __support}/newlib/xlocale.h (82%) rename libcxx/include/{support => __support}/nuttx/xlocale.h (70%) rename libcxx/include/{support => __support}/openbsd/xlocale.h (78%) rename libcxx/include/{support => __support}/solaris/floatingpoint.h (100%) rename libcxx/include/{support => __support}/solaris/wchar.h (100%) rename libcxx/include/{support => __support}/solaris/xlocale.h (100%) rename libcxx/include/{support => __support}/win32/limits_msvc_win32.h (96%) rename libcxx/include/{support => __support}/win32/locale_win32.h (99%) rename libcxx/include/{support => __support}/xlocale/__nop_locale_mgmt.h (94%) rename libcxx/include/{support => __support}/xlocale/__posix_l_fallback.h (98%) rename libcxx/include/{support => __support}/xlocale/__strtonum_fallback.h (96%) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 77e5e556d684..29a317b8ae9a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -150,25 +150,25 @@ set(files string.h string_view strstream - support/android/locale_bionic.h - support/fuchsia/xlocale.h - support/ibm/limits.h - support/ibm/locale_mgmt_aix.h - support/ibm/nanosleep.h - support/ibm/support.h - support/ibm/xlocale.h - support/musl/xlocale.h - support/newlib/xlocale.h - support/nuttx/xlocale.h - support/openbsd/xlocale.h - support/solaris/floatingpoint.h - support/solaris/wchar.h - support/solaris/xlocale.h - support/win32/limits_msvc_win32.h - support/win32/locale_win32.h - support/xlocale/__nop_locale_mgmt.h - support/xlocale/__posix_l_fallback.h - support/xlocale/__strtonum_fallback.h + __support/android/locale_bionic.h + __support/fuchsia/xlocale.h + __support/ibm/limits.h + __support/ibm/locale_mgmt_aix.h + __support/ibm/nanosleep.h + __support/ibm/support.h + __support/ibm/xlocale.h + __support/musl/xlocale.h + __support/newlib/xlocale.h + __support/nuttx/xlocale.h + __support/openbsd/xlocale.h + __support/solaris/floatingpoint.h + __support/solaris/wchar.h + __support/solaris/xlocale.h + __support/win32/limits_msvc_win32.h + __support/win32/locale_win32.h + __support/xlocale/__nop_locale_mgmt.h + __support/xlocale/__posix_l_fallback.h + __support/xlocale/__strtonum_fallback.h system_error tgmath.h thread diff --git a/libcxx/include/__locale b/libcxx/include/__locale index a2da7d78049f..77e5faab2676 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -21,30 +21,30 @@ #include #if defined(_LIBCPP_MSVCRT_LIKE) # include -# include +# include <__support/win32/locale_win32.h> #elif defined(__NuttX__) -# include +# include <__support/nuttx/xlocale.h> #elif defined(_AIX) || defined(__MVS__) -# include +# include <__support/ibm/xlocale.h> #elif defined(__ANDROID__) -# include +# include <__support/android/locale_bionic.h> #elif defined(__sun__) # include -# include +# include <__support/solaris/xlocale.h> #elif defined(_NEWLIB_VERSION) -# include +# include <__support/newlib/xlocale.h> #elif defined(__OpenBSD__) -# include +# include <__support/openbsd/xlocale.h> #elif (defined(__APPLE__) || defined(__FreeBSD__) \ || defined(__EMSCRIPTEN__) || defined(__IBMCPP__)) # include #elif defined(__Fuchsia__) -# include +# include <__support/fuchsia/xlocale.h> #elif defined(__wasi__) // WASI libc uses musl's locales support. -# include +# include <__support/musl/xlocale.h> #elif defined(_LIBCPP_HAS_MUSL_LIBC) -# include +# include <__support/musl/xlocale.h> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/support/android/locale_bionic.h b/libcxx/include/__support/android/locale_bionic.h similarity index 90% rename from libcxx/include/support/android/locale_bionic.h rename to libcxx/include/__support/android/locale_bionic.h index f05a6a0522ca..8c6d4bd0dc32 100644 --- a/libcxx/include/support/android/locale_bionic.h +++ b/libcxx/include/__support/android/locale_bionic.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/android/locale_bionic.h ------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -28,13 +28,13 @@ extern "C" { #include #include #if __ANDROID_API__ < 21 -#include +#include <__support/xlocale/__posix_l_fallback.h> #endif // In NDK versions later than 16, locale-aware functions are provided by // legacy_stdlib_inlines.h #if __NDK_MAJOR__ <= 16 #if __ANDROID_API__ < 21 -#include +#include <__support/xlocale/__strtonum_fallback.h> #elif __ANDROID_API__ < 26 #if defined(__cplusplus) diff --git a/libcxx/include/support/fuchsia/xlocale.h b/libcxx/include/__support/fuchsia/xlocale.h similarity index 74% rename from libcxx/include/support/fuchsia/xlocale.h rename to libcxx/include/__support/fuchsia/xlocale.h index b86ce9efbd11..e8def81480ea 100644 --- a/libcxx/include/support/fuchsia/xlocale.h +++ b/libcxx/include/__support/fuchsia/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/fuchsia/xlocale.h ------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,8 +14,8 @@ #include #include -#include -#include +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> #endif // defined(__Fuchsia__) diff --git a/libcxx/include/support/ibm/limits.h b/libcxx/include/__support/ibm/limits.h similarity index 97% rename from libcxx/include/support/ibm/limits.h rename to libcxx/include/__support/ibm/limits.h index d1c59f066a87..45f1f1e3684c 100644 --- a/libcxx/include/support/ibm/limits.h +++ b/libcxx/include/__support/ibm/limits.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------------- support/ibm/limits.h ---------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/ibm/locale_mgmt_aix.h b/libcxx/include/__support/ibm/locale_mgmt_aix.h similarity index 96% rename from libcxx/include/support/ibm/locale_mgmt_aix.h rename to libcxx/include/__support/ibm/locale_mgmt_aix.h index e452dc32529d..4f658c3eee30 100644 --- a/libcxx/include/support/ibm/locale_mgmt_aix.h +++ b/libcxx/include/__support/ibm/locale_mgmt_aix.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/ibm/locale_mgmt_aix.h --------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/ibm/nanosleep.h b/libcxx/include/__support/ibm/nanosleep.h similarity index 100% rename from libcxx/include/support/ibm/nanosleep.h rename to libcxx/include/__support/ibm/nanosleep.h diff --git a/libcxx/include/support/ibm/support.h b/libcxx/include/__support/ibm/support.h similarity index 95% rename from libcxx/include/support/ibm/support.h rename to libcxx/include/__support/ibm/support.h index 0569cbe7460d..a7751b017666 100644 --- a/libcxx/include/support/ibm/support.h +++ b/libcxx/include/__support/ibm/support.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===----------------------- support/ibm/support.h ----------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/ibm/xlocale.h b/libcxx/include/__support/ibm/xlocale.h similarity index 97% rename from libcxx/include/support/ibm/xlocale.h rename to libcxx/include/__support/ibm/xlocale.h index fde137cde260..ad07a255fc95 100644 --- a/libcxx/include/support/ibm/xlocale.h +++ b/libcxx/include/__support/ibm/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------------- support/ibm/xlocale.h -------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,7 +9,8 @@ #ifndef _LIBCPP_SUPPORT_IBM_XLOCALE_H #define _LIBCPP_SUPPORT_IBM_XLOCALE_H -#include + +#include <__support/ibm/locale_mgmt_aix.h> #include "cstdlib" @@ -218,7 +219,7 @@ size_t strftime_l(char *__s, size_t __size, const char *__fmt, #elif defined(__MVS__) #include // POSIX routines -#include +#include <__support/xlocale/__posix_l_fallback.h> #endif // defined(__MVS__) // The following are not POSIX routines. These are quick-and-dirty hacks diff --git a/libcxx/include/support/musl/xlocale.h b/libcxx/include/__support/musl/xlocale.h similarity index 95% rename from libcxx/include/support/musl/xlocale.h rename to libcxx/include/__support/musl/xlocale.h index 722d13fa1d66..2508a8e8e0ca 100644 --- a/libcxx/include/support/musl/xlocale.h +++ b/libcxx/include/__support/musl/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------- support/musl/xlocale.h ------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/newlib/xlocale.h b/libcxx/include/__support/newlib/xlocale.h similarity index 82% rename from libcxx/include/support/newlib/xlocale.h rename to libcxx/include/__support/newlib/xlocale.h index 25fa798b6d02..b75f9263a4c4 100644 --- a/libcxx/include/support/newlib/xlocale.h +++ b/libcxx/include/__support/newlib/xlocale.h @@ -17,9 +17,9 @@ #include #if !defined(__NEWLIB__) || __NEWLIB__ < 2 || \ __NEWLIB__ == 2 && __NEWLIB_MINOR__ < 5 -#include -#include -#include +#include <__support/xlocale/__nop_locale_mgmt.h> +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> #endif #endif // _NEWLIB_VERSION diff --git a/libcxx/include/support/nuttx/xlocale.h b/libcxx/include/__support/nuttx/xlocale.h similarity index 70% rename from libcxx/include/support/nuttx/xlocale.h rename to libcxx/include/__support/nuttx/xlocale.h index b70d62005046..be738e3b64e4 100644 --- a/libcxx/include/support/nuttx/xlocale.h +++ b/libcxx/include/__support/nuttx/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/nuttx/xlocale.h -------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,8 +11,8 @@ #define _LIBCPP_SUPPORT_NUTTX_XLOCALE_H #if defined(__NuttX__) -#include -#include +#include <__support/xlocale/__posix_l_fallback.h> +#include <__support/xlocale/__strtonum_fallback.h> #endif // __NuttX__ #endif diff --git a/libcxx/include/support/openbsd/xlocale.h b/libcxx/include/__support/openbsd/xlocale.h similarity index 78% rename from libcxx/include/support/openbsd/xlocale.h rename to libcxx/include/__support/openbsd/xlocale.h index fbfaedd127c6..1136fa327fac 100644 --- a/libcxx/include/support/openbsd/xlocale.h +++ b/libcxx/include/__support/openbsd/xlocale.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/openbsd/xlocale.h -----------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,6 +14,6 @@ #include #include #include -#include +#include <__support/xlocale/__strtonum_fallback.h> #endif diff --git a/libcxx/include/support/solaris/floatingpoint.h b/libcxx/include/__support/solaris/floatingpoint.h similarity index 100% rename from libcxx/include/support/solaris/floatingpoint.h rename to libcxx/include/__support/solaris/floatingpoint.h diff --git a/libcxx/include/support/solaris/wchar.h b/libcxx/include/__support/solaris/wchar.h similarity index 100% rename from libcxx/include/support/solaris/wchar.h rename to libcxx/include/__support/solaris/wchar.h diff --git a/libcxx/include/support/solaris/xlocale.h b/libcxx/include/__support/solaris/xlocale.h similarity index 100% rename from libcxx/include/support/solaris/xlocale.h rename to libcxx/include/__support/solaris/xlocale.h diff --git a/libcxx/include/support/win32/limits_msvc_win32.h b/libcxx/include/__support/win32/limits_msvc_win32.h similarity index 96% rename from libcxx/include/support/win32/limits_msvc_win32.h rename to libcxx/include/__support/win32/limits_msvc_win32.h index 7bb835559a3b..758d24647b1b 100644 --- a/libcxx/include/support/win32/limits_msvc_win32.h +++ b/libcxx/include/__support/win32/limits_msvc_win32.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------------ support/win32/limits_msvc_win32.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/win32/locale_win32.h b/libcxx/include/__support/win32/locale_win32.h similarity index 99% rename from libcxx/include/support/win32/locale_win32.h rename to libcxx/include/__support/win32/locale_win32.h index 897c36be70c6..d32a7a8ad304 100644 --- a/libcxx/include/support/win32/locale_win32.h +++ b/libcxx/include/__support/win32/locale_win32.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------------- support/win32/locale_win32.h -------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/xlocale/__nop_locale_mgmt.h b/libcxx/include/__support/xlocale/__nop_locale_mgmt.h similarity index 94% rename from libcxx/include/support/xlocale/__nop_locale_mgmt.h rename to libcxx/include/__support/xlocale/__nop_locale_mgmt.h index f33d3894c3a9..57b18842ff45 100644 --- a/libcxx/include/support/xlocale/__nop_locale_mgmt.h +++ b/libcxx/include/__support/xlocale/__nop_locale_mgmt.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===------------ support/xlocale/__nop_locale_mgmt.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h similarity index 98% rename from libcxx/include/support/xlocale/__posix_l_fallback.h rename to libcxx/include/__support/xlocale/__posix_l_fallback.h index f3df6c46fbab..00d69d19e8c8 100644 --- a/libcxx/include/support/xlocale/__posix_l_fallback.h +++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===--------------- support/xlocale/__posix_l_fallback.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h similarity index 96% rename from libcxx/include/support/xlocale/__strtonum_fallback.h rename to libcxx/include/__support/xlocale/__strtonum_fallback.h index df38598056a6..1172a5d57236 100644 --- a/libcxx/include/support/xlocale/__strtonum_fallback.h +++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------- support/xlocale/__strtonum_fallback.h -----------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support index 473c9c3bbe49..de572f3ff84d 100644 --- a/libcxx/include/__threading_support +++ b/libcxx/include/__threading_support @@ -17,7 +17,7 @@ #include #ifdef __MVS__ -# include +# include <__support/ibm/nanosleep.h> #endif #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER diff --git a/libcxx/include/bit b/libcxx/include/bit index fe360179c5ca..f8c37c3d6bbf 100644 --- a/libcxx/include/bit +++ b/libcxx/include/bit @@ -62,7 +62,7 @@ namespace std { #include <__debug> #if defined(__IBMCPP__) -#include "support/ibm/support.h" +#include "__support/ibm/support.h" #endif #if defined(_LIBCPP_COMPILER_MSVC) #include diff --git a/libcxx/include/limits b/libcxx/include/limits index 6d5d1e1aca75..8f97cd10a8b1 100644 --- a/libcxx/include/limits +++ b/libcxx/include/limits @@ -105,11 +105,11 @@ template<> class numeric_limits; #include #if defined(_LIBCPP_COMPILER_MSVC) -#include "support/win32/limits_msvc_win32.h" +#include "__support/win32/limits_msvc_win32.h" #endif // _LIBCPP_MSVCRT #if defined(__IBMCPP__) -#include "support/ibm/limits.h" +#include "__support/ibm/limits.h" #endif // __IBMCPP__ #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index c482068fa99a..9965104cb5b2 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -107,7 +107,7 @@ endif() if (LIBCXX_CONFIGURE_IDE) file(GLOB_RECURSE LIBCXX_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/*) if(WIN32) - file( GLOB LIBCXX_WIN32_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/support/win32/*.h) + file( GLOB LIBCXX_WIN32_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/__support/win32/*.h) list(APPEND LIBCXX_HEADERS ${LIBCXX_WIN32_HEADERS}) endif() # Force them all into the headers dir on MSVC, otherwise they end up at diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index f109389f68f3..a0209d0ce8cf 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -29,7 +29,7 @@ #include "cwctype" #include "__sso_allocator" #if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__) -#include "support/win32/locale_win32.h" +#include "__support/win32/locale_win32.h" #elif !defined(__BIONIC__) && !defined(__NuttX__) #include #endif diff --git a/libcxx/src/support/solaris/xlocale.cpp b/libcxx/src/support/solaris/xlocale.cpp index d68a39f4dfe5..d25adcd21d30 100644 --- a/libcxx/src/support/solaris/xlocale.cpp +++ b/libcxx/src/support/solaris/xlocale.cpp @@ -8,7 +8,7 @@ #ifdef __sun__ -#include "support/solaris/xlocale.h" +#include "__support/solaris/xlocale.h" #include #include #include diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp index b7062db352ad..e7c6005fc1a3 100644 --- a/libcxx/src/support/win32/locale_win32.cpp +++ b/libcxx/src/support/win32/locale_win32.cpp @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/win32/locale_win32.cpp ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/src/support/win32/support.cpp b/libcxx/src/support/win32/support.cpp index d156e02e3e84..52453f547926 100644 --- a/libcxx/src/support/win32/support.cpp +++ b/libcxx/src/support/win32/support.cpp @@ -1,5 +1,5 @@ // -*- C++ -*- -//===----------------------- support/win32/support.h ----------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/libcxx/src/support/win32/thread_win32.cpp b/libcxx/src/support/win32/thread_win32.cpp index 83e7e9f6ce5b..35c4c871457d 100644 --- a/libcxx/src/support/win32/thread_win32.cpp +++ b/libcxx/src/support/win32/thread_win32.cpp @@ -1,5 +1,5 @@ // -*- C++ -*- -//===-------------------- support/win32/thread_win32.cpp ------------------===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 644f0a767558..2ca495b08fba 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -217,25 +217,25 @@ copy("include") { "string.h", "string_view", "strstream", - "support/android/locale_bionic.h", - "support/fuchsia/xlocale.h", - "support/ibm/limits.h", - "support/ibm/locale_mgmt_aix.h", - "support/ibm/nanosleep.h", - "support/ibm/support.h", - "support/ibm/xlocale.h", - "support/musl/xlocale.h", - "support/newlib/xlocale.h", - "support/nuttx/xlocale.h", - "support/openbsd/xlocale.h", - "support/solaris/floatingpoint.h", - "support/solaris/wchar.h", - "support/solaris/xlocale.h", - "support/win32/limits_msvc_win32.h", - "support/win32/locale_win32.h", - "support/xlocale/__nop_locale_mgmt.h", - "support/xlocale/__posix_l_fallback.h", - "support/xlocale/__strtonum_fallback.h", + "__support/android/locale_bionic.h", + "__support/fuchsia/xlocale.h", + "__support/ibm/limits.h", + "__support/ibm/locale_mgmt_aix.h", + "__support/ibm/nanosleep.h", + "__support/ibm/support.h", + "__support/ibm/xlocale.h", + "__support/musl/xlocale.h", + "__support/newlib/xlocale.h", + "__support/nuttx/xlocale.h", + "__support/openbsd/xlocale.h", + "__support/solaris/floatingpoint.h", + "__support/solaris/wchar.h", + "__support/solaris/xlocale.h", + "__support/win32/limits_msvc_win32.h", + "__support/win32/locale_win32.h", + "__support/xlocale/__nop_locale_mgmt.h", + "__support/xlocale/__posix_l_fallback.h", + "__support/xlocale/__strtonum_fallback.h", "system_error", "tgmath.h", "thread", From bc39d53d9a4f1ed7c903648f3fd408296fd55c95 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 1 Feb 2021 15:18:42 -0800 Subject: [PATCH 070/244] =?UTF-8?q?[=F0=9F=8D=92]Disable=20CFI=20in=20=5F?= =?UTF-8?q?=5Fget=5Felem=20to=20allow=20casting=20a=20pointer=20to=20unini?= =?UTF-8?q?tialized=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes usage of shared_ptr with CFI enabled, which is llvm.org/pr48993. (cherry pick of commit bab74864168bb5e28ecbc0294fe1095d8da7f569) Differential Revision: https://reviews.llvm.org/D96063 --- libcxx/include/memory | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/include/memory b/libcxx/include/memory index a00916c8c03f..39d0f5bee6a5 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -2647,7 +2647,7 @@ private: _Alloc *__alloc = reinterpret_cast<_Alloc*>(__first); return __alloc; } - _Tp* __get_elem() _NOEXCEPT { + _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { _CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_); typename _CompressedPair::_Base2* __second = _CompressedPair::__get_second_base(__as_pair); _Tp *__elem = reinterpret_cast<_Tp*>(__second); From 251f3295b498b699aa2b926167a788a6b6dbc033 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Feb 2021 17:00:20 -0500 Subject: [PATCH 071/244] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Fix=20libcxx?= =?UTF-8?q?=20build=20on=2032bit=20architectures=20with=2064bit=20time=5Ft?= =?UTF-8?q?=20defaults=20e.g.=20riscv32?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Khem Raj. (cherry pick of commit 85b9c5ccc172a1e61c7ecaaec4752587cb6f1e26) Differential Revision: https://reviews.llvm.org/D96062 --- libcxx/src/atomic.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp index 6b73ed771cd1..9ae1fb5199bf 100644 --- a/libcxx/src/atomic.cpp +++ b/libcxx/src/atomic.cpp @@ -19,6 +19,12 @@ #include #include +// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures +// with a 64 bit time_t, we need to specify SYS_futex_time64. +#if !defined(SYS_futex) && defined(SYS_futex_time64) +# define SYS_futex SYS_futex_time64 +#endif + #else // <- Add other operating systems here // Baseline needs no new headers From d7d818c3615e4ff6bb283df0c1ddbb2b2cd50075 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Wed, 27 Jan 2021 13:02:45 -0800 Subject: [PATCH 072/244] Fix runInTerminal failures on Windows stella.stemenova mentioned in https://reviews.llvm.org/D93951 failures on Windows for this test. I'm fixing the macro definitions and disabling the tests for python versions lower than 3.7. I'll figure out that actual issue with python3.6 after the buildbots are fine again. (cherry picked from commit ab5591e1d8f5abcfa9e75193d3e8a29087b61425) --- .../runInTerminal/TestVSCode_runInTerminal.py | 34 +++++++++++++++---- lldb/tools/lldb-vscode/FifoFiles.cpp | 10 +++--- lldb/tools/lldb-vscode/FifoFiles.h | 1 + lldb/tools/lldb-vscode/lldb-vscode.cpp | 4 +-- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py index 055b5a5bed87..047cc317596f 100644 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py @@ -33,20 +33,30 @@ def readErrorMessage(self, fifo_file): with open(fifo_file, "r") as file: return file.readline() + def isTestSupported(self): + # For some strange reason, this test fails on python3.6 + if not (sys.version_info.major == 3 and sys.version_info.minor >= 7): + return False + try: + # We skip this test for debug builds because it takes too long parsing lldb's own + # debug info. Release builds are fine. + # Checking the size of the lldb-vscode binary seems to be a decent proxy for a quick + # detection. It should be far less than 1 MB in Release builds. + if os.path.getsize(os.environ["LLDBVSCODE_EXEC"]) < 1000000: + return True + except: + return False + @skipIfWindows @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_runInTerminal(self): + if not self.isTestSupported(): + return ''' Tests the "runInTerminal" reverse request. It makes sure that the IDE can launch the inferior with the correct environment variables and arguments. ''' - if "debug" in str(os.environ["LLDBVSCODE_EXEC"]).lower(): - # We skip this test for debug builds because it takes too long parsing lldb's own - # debug info. Release builds are fine. - # Checking this environment variable seems to be a decent proxy for a quick - # detection - return program = self.getBuildArtifact("a.out") source = 'main.c' self.build_and_launch( @@ -77,6 +87,8 @@ def test_runInTerminal(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_runInTerminalInvalidTarget(self): + if not self.isTestSupported(): + return self.build_and_create_debug_adaptor() response = self.launch( "INVALIDPROGRAM", stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"], expectFailure=True) @@ -88,6 +100,8 @@ def test_runInTerminalInvalidTarget(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_missingArgInRunInTerminalLauncher(self): + if not self.isTestSupported(): + return proc = subprocess.run([self.lldbVSCodeExec, "--launch-target", "INVALIDPROGRAM"], capture_output=True, universal_newlines=True) self.assertTrue(proc.returncode != 0) @@ -97,6 +111,8 @@ def test_missingArgInRunInTerminalLauncher(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) @@ -115,6 +131,8 @@ def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) @@ -132,6 +150,8 @@ def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) @@ -150,6 +170,8 @@ def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self): @skipIfRemote @skipIf(archs=no_match(['x86_64'])) def test_NonAttachedRunInTerminalLauncher(self): + if not self.isTestSupported(): + return comm_file = os.path.join(self.getBuildDir(), "comm-file") os.mkfifo(comm_file) diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-vscode/FifoFiles.cpp index b69970ec0168..0a36c87d4a94 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.cpp +++ b/lldb/tools/lldb-vscode/FifoFiles.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !defined(WIN32) +#include "FifoFiles.h" + +#if LLVM_ON_UNIX #include #include #include @@ -21,8 +23,6 @@ #include "lldb/lldb-defines.h" -#include "FifoFiles.h" - using namespace llvm; namespace lldb_vscode { @@ -30,13 +30,13 @@ namespace lldb_vscode { FifoFile::FifoFile(StringRef path) : m_path(path) {} FifoFile::~FifoFile() { -#if !defined(WIN32) +#if LLVM_ON_UNIX unlink(m_path.c_str()); #endif }; Expected> CreateFifoFile(StringRef path) { -#if defined(WIN32) +#if !LLVM_ON_UNIX return createStringError(inconvertibleErrorCode(), "Unimplemented"); #else if (int err = mkfifo(path.data(), 0600)) diff --git a/lldb/tools/lldb-vscode/FifoFiles.h b/lldb/tools/lldb-vscode/FifoFiles.h index 891b6f574601..f186f65e86c4 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.h +++ b/lldb/tools/lldb-vscode/FifoFiles.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H #define LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/Support/Error.h" #include "JSONUtils.h" diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index c581b9b4a9a0..69eb2e70aa6d 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -3002,8 +3002,8 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) { // emitted to the debug adaptor. void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, llvm::StringRef comm_file, char *argv[]) { -#if defined(WIN_32) - llvm::errs() << "runInTerminal is not supported on Windows\n"; +#if !LLVM_ON_UNIX + llvm::errs() << "runInTerminal is only supported on POSIX systems\n"; exit(EXIT_FAILURE); #else RunInTerminalLauncherCommChannel comm_channel(comm_file); From 27aff2aa2ade9d78d0081445eadacd5b5006143e Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Thu, 28 Jan 2021 09:24:30 -0800 Subject: [PATCH 073/244] Fix lldb-vscode builds on Windows targeting POSIX @stella.stamenova found out that lldb-vscode's Win32 macros were failing when building on windows targetings POSIX platforms. I'm changing these macros for LLVM_ON_UNIX, which should be more accurate. (cherry picked from commit 0bca9a7ce2eeaa9f1d732ffbc17769560a2b236e) --- lldb/tools/lldb-vscode/IOStream.cpp | 6 +++--- lldb/tools/lldb-vscode/IOStream.h | 4 +++- lldb/tools/lldb-vscode/RunInTerminal.cpp | 6 +++--- lldb/tools/lldb-vscode/VSCode.cpp | 4 ++-- lldb/tools/lldb-vscode/VSCode.h | 2 ++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 11 ++++++----- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp index 4b11b90b4c2e..fdbfb554aedb 100644 --- a/lldb/tools/lldb-vscode/IOStream.cpp +++ b/lldb/tools/lldb-vscode/IOStream.cpp @@ -8,7 +8,7 @@ #include "IOStream.h" -#if defined(_WIN32) +#if !LLVM_ON_UNIX #include #else #include @@ -33,7 +33,7 @@ StreamDescriptor::~StreamDescriptor() { return; if (m_is_socket) -#if defined(_WIN32) +#if !LLVM_ON_UNIX ::closesocket(m_socket); #else ::close(m_socket); @@ -108,7 +108,7 @@ bool InputStream::read_full(std::ofstream *log, size_t length, } if (bytes_read < 0) { int reason = 0; -#if defined(_WIN32) +#if !LLVM_ON_UNIX if (descriptor.m_is_socket) reason = WSAGetLastError(); else diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-vscode/IOStream.h index 603ae9adcc2a..1ec7ac3ed0f9 100644 --- a/lldb/tools/lldb-vscode/IOStream.h +++ b/lldb/tools/lldb-vscode/IOStream.h @@ -9,7 +9,9 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H #define LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H -#if defined(_WIN32) +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX + +#if !LLVM_ON_UNIX // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-vscode/RunInTerminal.cpp index 4db2806924ca..29edf5ca381d 100644 --- a/lldb/tools/lldb-vscode/RunInTerminal.cpp +++ b/lldb/tools/lldb-vscode/RunInTerminal.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -#if !defined(WIN32) +#include "RunInTerminal.h" + +#if LLVM_ON_UNIX #include #include #include @@ -21,8 +23,6 @@ #include "lldb/lldb-defines.h" -#include "RunInTerminal.h" - using namespace llvm; namespace lldb_vscode { diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index e9fdc17f4147..4d0e281c1b8d 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -14,7 +14,7 @@ #include "VSCode.h" #include "llvm/Support/FormatVariadic.h" -#if defined(_WIN32) +#if !LLVM_ON_UNIX #define NOMINMAX #include #include @@ -41,7 +41,7 @@ VSCode::VSCode() stop_at_entry(false), is_attach(false), reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); -#if defined(_WIN32) +#if !LLVM_ON_UNIX // Windows opens stdout and stdin in text mode which converts \n to 13,10 // while the value is just 10 on Darwin/Linux. Setting the file mode to binary // fixes this. diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 8e7dfc078934..a2e1cac8ecf9 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,6 +9,8 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX + #include #include #include diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 69eb2e70aa6d..b7f39cbb1cb5 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "VSCode.h" + #include #include #include @@ -14,7 +16,7 @@ #include #include #include -#if defined(_WIN32) +#if !LLVM_ON_UNIX // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because @@ -52,9 +54,8 @@ #include "JSONUtils.h" #include "LLDBUtils.h" -#include "VSCode.h" -#if defined(_WIN32) +#if !LLVM_ON_UNIX #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif @@ -131,7 +132,7 @@ SOCKET AcceptConnection(int portno) { *g_vsc.log << "error: accept (" << strerror(errno) << ")" << std::endl; } -#if defined(_WIN32) +#if !LLVM_ON_UNIX closesocket(sockfd); #else close(sockfd); @@ -3084,7 +3085,7 @@ int main(int argc, char *argv[]) { } } -#if !defined(_WIN32) +#if LLVM_ON_UNIX if (input_args.hasArg(OPT_wait_for_debugger)) { printf("Paused waiting for debugger to attach (pid = %i)...\n", getpid()); pause(); From 1cb6551edb94eea1fc087b346b1e8d13775dc692 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Thu, 4 Feb 2021 10:07:07 -0800 Subject: [PATCH 074/244] [lldb-vscode] correctly use Windows macros @mstorsjo found a mistake that I made when trying to fix some Windows compilation errors encountered by @stella.stamenova. I was incorrectly using the LLVM_ON_UNIX macro. In any case, proper use of #if defined(_WIN32) should be the actual fix. Differential Revision: https://reviews.llvm.org/D96060 (cherry picked from commit 36496cc2992d6fa26e6024971efcfc7d15f69888) --- lldb/tools/lldb-vscode/FifoFiles.cpp | 6 +++--- lldb/tools/lldb-vscode/IOStream.cpp | 6 +++--- lldb/tools/lldb-vscode/IOStream.h | 2 +- lldb/tools/lldb-vscode/RunInTerminal.cpp | 2 +- lldb/tools/lldb-vscode/VSCode.cpp | 4 ++-- lldb/tools/lldb-vscode/lldb-vscode.cpp | 10 +++++----- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-vscode/FifoFiles.cpp index 0a36c87d4a94..4b14fb16f96c 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.cpp +++ b/lldb/tools/lldb-vscode/FifoFiles.cpp @@ -8,7 +8,7 @@ #include "FifoFiles.h" -#if LLVM_ON_UNIX +#if !defined(_WIN32) #include #include #include @@ -30,13 +30,13 @@ namespace lldb_vscode { FifoFile::FifoFile(StringRef path) : m_path(path) {} FifoFile::~FifoFile() { -#if LLVM_ON_UNIX +#if !defined(_WIN32) unlink(m_path.c_str()); #endif }; Expected> CreateFifoFile(StringRef path) { -#if !LLVM_ON_UNIX +#if defined(_WIN32) return createStringError(inconvertibleErrorCode(), "Unimplemented"); #else if (int err = mkfifo(path.data(), 0600)) diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp index fdbfb554aedb..cd22d906c14c 100644 --- a/lldb/tools/lldb-vscode/IOStream.cpp +++ b/lldb/tools/lldb-vscode/IOStream.cpp @@ -8,7 +8,7 @@ #include "IOStream.h" -#if !LLVM_ON_UNIX +#if defined(_WIN32) #include #else #include @@ -33,7 +33,7 @@ StreamDescriptor::~StreamDescriptor() { return; if (m_is_socket) -#if !LLVM_ON_UNIX +#if defined(_WIN32) ::closesocket(m_socket); #else ::close(m_socket); @@ -108,7 +108,7 @@ bool InputStream::read_full(std::ofstream *log, size_t length, } if (bytes_read < 0) { int reason = 0; -#if !LLVM_ON_UNIX +#if defined(_WIN32) if (descriptor.m_is_socket) reason = WSAGetLastError(); else diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-vscode/IOStream.h index 1ec7ac3ed0f9..0eb9b6fefb0d 100644 --- a/lldb/tools/lldb-vscode/IOStream.h +++ b/lldb/tools/lldb-vscode/IOStream.h @@ -11,7 +11,7 @@ #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX -#if !LLVM_ON_UNIX +#if defined(_WIN32) // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-vscode/RunInTerminal.cpp index 29edf5ca381d..2126563d9e96 100644 --- a/lldb/tools/lldb-vscode/RunInTerminal.cpp +++ b/lldb/tools/lldb-vscode/RunInTerminal.cpp @@ -8,7 +8,7 @@ #include "RunInTerminal.h" -#if LLVM_ON_UNIX +#if !defined(_WIN32) #include #include #include diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 4d0e281c1b8d..e9fdc17f4147 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -14,7 +14,7 @@ #include "VSCode.h" #include "llvm/Support/FormatVariadic.h" -#if !LLVM_ON_UNIX +#if defined(_WIN32) #define NOMINMAX #include #include @@ -41,7 +41,7 @@ VSCode::VSCode() stop_at_entry(false), is_attach(false), reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); -#if !LLVM_ON_UNIX +#if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 // while the value is just 10 on Darwin/Linux. Setting the file mode to binary // fixes this. diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index b7f39cbb1cb5..9469690cd7db 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -16,7 +16,7 @@ #include #include #include -#if !LLVM_ON_UNIX +#if defined(_WIN32) // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. // We also need to #undef GetObject (which is defined to GetObjectW) because @@ -55,7 +55,7 @@ #include "JSONUtils.h" #include "LLDBUtils.h" -#if !LLVM_ON_UNIX +#if defined(_WIN32) #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif @@ -132,7 +132,7 @@ SOCKET AcceptConnection(int portno) { *g_vsc.log << "error: accept (" << strerror(errno) << ")" << std::endl; } -#if !LLVM_ON_UNIX +#if defined(_WIN32) closesocket(sockfd); #else close(sockfd); @@ -3003,7 +3003,7 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) { // emitted to the debug adaptor. void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg, llvm::StringRef comm_file, char *argv[]) { -#if !LLVM_ON_UNIX +#if defined(_WIN32) llvm::errs() << "runInTerminal is only supported on POSIX systems\n"; exit(EXIT_FAILURE); #else @@ -3085,7 +3085,7 @@ int main(int argc, char *argv[]) { } } -#if LLVM_ON_UNIX +#if !defined(_WIN32) if (input_args.hasArg(OPT_wait_for_debugger)) { printf("Paused waiting for debugger to attach (pid = %i)...\n", getpid()); pause(); From c9fb4a947e32abfaa73b0b91a58ef71c73316322 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Thu, 4 Feb 2021 17:00:09 -0800 Subject: [PATCH 075/244] [AST] Update LVal before evaluating lambda decl fields. Differential Revision: https://reviews.llvm.org/D96092 (cherry picked from commit 96fb49c3ff8e08680127ddd4ec45a0e6c199243b) --- clang/lib/AST/ExprConstant.cpp | 8 +++++++- clang/test/SemaCXX/constant-expression-cxx2a.cpp | 10 ++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 56181bbe1166..1c4caa2c1fc0 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -10009,6 +10009,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { auto *CaptureInitIt = E->capture_init_begin(); const LambdaCapture *CaptureIt = ClosureClass->captures_begin(); bool Success = true; + const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(ClosureClass); for (const auto *Field : ClosureClass->fields()) { assert(CaptureInitIt != E->capture_init_end()); // Get the initializer for this field @@ -10019,8 +10020,13 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) { if (!CurFieldInit) return Error(E); + LValue Subobject = This; + + if (!HandleLValueMember(Info, E, Subobject, Field, &Layout)) + return false; + APValue &FieldVal = Result.getStructField(Field->getFieldIndex()); - if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) { + if (!EvaluateInPlace(FieldVal, Info, Subobject, CurFieldInit)) { if (!Info.keepEvaluatingAfterFailure()) return false; Success = false; diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp index 4adadc9988ab..86020a09db44 100644 --- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp +++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp @@ -1437,3 +1437,13 @@ constexpr bool destroy_at_test() { return true; } static_assert(destroy_at_test()); + +namespace PR48582 { + struct S { + void *p = this; + constexpr S() {} + constexpr S(const S&) {} + }; + constexpr bool b = [a = S(), b = S()] { return a.p == b.p; }(); + static_assert(!b); +} From 8153dee37272a73b1ed74ac1bc12422fac8ef033 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 8 Feb 2021 17:58:05 -0800 Subject: [PATCH 076/244] PR48606: The lifetime of a constexpr heap allocation always started during the same evaluation. It looks like the only case for which this matters is determining whether mutable subobjects of a heap allocation can be modified during constant evaluation. (cherry picked from commit 21e8bb83253e1a2f4b6fad9b53cafe8c530a38e2) --- clang/lib/AST/ExprConstant.cpp | 4 +-- .../test/SemaCXX/cxx2a-constexpr-dynalloc.cpp | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1c4caa2c1fc0..cd2b5141ebe8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3497,8 +3497,8 @@ static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK, static bool lifetimeStartedInEvaluation(EvalInfo &Info, APValue::LValueBase Base, bool MutableSubobject = false) { - // A temporary we created. - if (Base.getCallIndex()) + // A temporary or transient heap allocation we created. + if (Base.getCallIndex() || Base.is()) return true; switch (Info.IsEvaluatingDecl) { diff --git a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp index 3647526ff0af..097ca00640e9 100644 --- a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp +++ b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp @@ -176,3 +176,37 @@ constexpr bool construct_after_lifetime_2() { return true; } static_assert(construct_after_lifetime_2()); // expected-error {{}} expected-note {{in call}} + +namespace PR48606 { + struct A { mutable int n = 0; }; + + constexpr bool f() { + A a; + A *p = &a; + p->~A(); + std::construct_at(p); + return true; + } + static_assert(f()); + + constexpr bool g() { + A *p = new A; + p->~A(); + std::construct_at(p); + delete p; + return true; + } + static_assert(g()); + + constexpr bool h() { + std::allocator alloc; + A *p = alloc.allocate(1); + std::construct_at(p); + p->~A(); + std::construct_at(p); + p->~A(); + alloc.deallocate(p); + return true; + } + static_assert(h()); +} From b46924ee5afe234526220c29a497794bf65f8f7f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 27 Jan 2021 10:14:54 +0000 Subject: [PATCH 077/244] Fix "not all control paths return a value" warning. NFCI. --- clang/lib/Basic/ProfileList.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp index 56bc37a79301..2cb05c1c3c07 100644 --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -82,6 +82,7 @@ static StringRef getSectionName(CodeGenOptions::ProfileInstrKind Kind) { case CodeGenOptions::ProfileCSIRInstr: return "csllvm"; } + llvm_unreachable("Unhandled CodeGenOptions::ProfileInstrKind enum"); } llvm::Optional From 8d20c14a8a3dd0f83d4066f957ba4c006d29942b Mon Sep 17 00:00:00 2001 From: Nathan James Date: Fri, 12 Feb 2021 16:55:44 +0000 Subject: [PATCH 078/244] [clangd] Fix clang tidy provider when multiple config files exist in directory tree Currently Clang tidy provider searches from the root directory up to the target directory, this is the opposite of how clang-tidy searches for config files. The result of this is .clang-tidy files are ignored in any subdirectory of a directory containing a .clang-tidy file. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D96204 (cherry picked from commit ba3ea9c60f0f259f0ccc47e47daf8253a5885531) --- clang-tools-extra/clangd/TidyProvider.cpp | 2 +- .../clangd/unittests/CMakeLists.txt | 1 + .../clangd/unittests/TidyProviderTests.cpp | 60 +++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/clangd/unittests/TidyProviderTests.cpp diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index c26c59fd347d..bcf1cd5a6183 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -106,7 +106,7 @@ class DotClangTidyTree { llvm::SmallVector Caches; { std::lock_guard Lock(Mu); - for (auto I = path::begin(Parent), E = path::end(Parent); I != E; ++I) { + for (auto I = path::rbegin(Parent), E = path::rend(Parent); I != E; ++I) { assert(I->end() >= Parent.begin() && I->end() <= Parent.end() && "Canonical path components should be substrings"); llvm::StringRef Ancestor(Parent.begin(), I->end() - Parent.begin()); diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt index adf4ac827cce..f4d364720eaf 100644 --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -93,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests TestIndex.cpp TestTU.cpp TestWorkspace.cpp + TidyProviderTests.cpp TypeHierarchyTests.cpp URITests.cpp XRefsTests.cpp diff --git a/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp b/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp new file mode 100644 index 000000000000..a16c87456a1a --- /dev/null +++ b/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp @@ -0,0 +1,60 @@ +//===-- TidyProviderTests.cpp - Clang tidy configuration provider tests ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestFS.h" +#include "TidyProvider.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { + +namespace { + +TEST(TidyProvider, NestedDirectories) { + MockFS FS; + FS.Files[testPath(".clang-tidy")] = R"yaml( + Checks: 'llvm-*' + CheckOptions: + - key: TestKey + value: 1 +)yaml"; + FS.Files[testPath("sub1/.clang-tidy")] = R"yaml( + Checks: 'misc-*' + CheckOptions: + - key: TestKey + value: 2 +)yaml"; + FS.Files[testPath("sub1/sub2/.clang-tidy")] = R"yaml( + Checks: 'bugprone-*' + CheckOptions: + - key: TestKey + value: 3 + InheritParentConfig: true +)yaml"; + + TidyProvider Provider = provideClangTidyFiles(FS); + + auto BaseOptions = getTidyOptionsForFile(Provider, testPath("File.cpp")); + ASSERT_TRUE(BaseOptions.Checks.hasValue()); + EXPECT_EQ(*BaseOptions.Checks, "llvm-*"); + EXPECT_EQ(BaseOptions.CheckOptions.lookup("TestKey").Value, "1"); + + auto Sub1Options = getTidyOptionsForFile(Provider, testPath("sub1/File.cpp")); + ASSERT_TRUE(Sub1Options.Checks.hasValue()); + EXPECT_EQ(*Sub1Options.Checks, "misc-*"); + EXPECT_EQ(Sub1Options.CheckOptions.lookup("TestKey").Value, "2"); + + auto Sub2Options = + getTidyOptionsForFile(Provider, testPath("sub1/sub2/File.cpp")); + ASSERT_TRUE(Sub2Options.Checks.hasValue()); + EXPECT_EQ(*Sub2Options.Checks, "misc-*,bugprone-*"); + EXPECT_EQ(Sub2Options.CheckOptions.lookup("TestKey").Value, "3"); +} +} // namespace +} // namespace clangd +} // namespace clang From 6604c3050948d602ef24b3d3efbf9f4410494833 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 2 Feb 2021 14:21:33 -0800 Subject: [PATCH 079/244] [GlobalISel] Check if branches use the same MBB in matchOptBrCondByInvertingCond If the G_BR + G_BRCOND in this combine use the same MBB, then it will infinite loop. Don't allow that to happen. Differential Revision: https://reviews.llvm.org/D95895 (cherry picked from commit 02d4b365bf4f8c2cb56e5612902f6c3bb4316493) --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 9 +++---- .../GlobalISel/prelegalizercombiner-br.mir | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index df0219fcfa64..a9353bdfb780 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -968,10 +968,11 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) { if (BrCond->getOpcode() != TargetOpcode::G_BRCOND) return false; - // Check that the next block is the conditional branch target. - if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB())) - return false; - return true; + // Check that the next block is the conditional branch target. Also make sure + // that it isn't the same as the G_BR's target (otherwise, this will loop.) + MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB(); + return BrCondTarget != MI.getOperand(0).getMBB() && + MBB->isLayoutSuccessor(BrCondTarget); } void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir index 0631ff89ade0..0647de44c4b8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir @@ -29,6 +29,7 @@ ret i32 %retval.0 } + define void @dont_combine_same_block() { ret void } ... --- @@ -87,3 +88,26 @@ body: | RET_ReallyLR implicit $w0 ... +--- +name: dont_combine_same_block +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dont_combine_same_block + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $w0, $w1 + ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF + ; CHECK: G_BRCOND %cond(s1), %bb.1 + ; CHECK: G_BR %bb.1 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $w0, $w1 + %cond:_(s1) = G_IMPLICIT_DEF + + ; The G_BRCOND and G_BR have the same target here. Don't change anything. + G_BRCOND %cond(s1), %bb.1 + G_BR %bb.1 + bb.1: + RET_ReallyLR +... From 04cb6b5ea8bd2b52e3d11f4cb970fd2d144eee6a Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 8 Feb 2021 17:32:52 -0800 Subject: [PATCH 080/244] PR48587: is_constant_evaluated() should not evaluate to true during a variable's destruction if it didn't do so during construction. The standard doesn't give any guidance as to what to do here, but this approach seems reasonable and conservative, and has been proposed to the standard committee. (cherry picked from commit c945dc4a5023d6a17d11fcda76509b94b36e34fc) --- clang/lib/AST/ExprConstant.cpp | 19 +++- .../builtin-is-constant-evaluated.cpp | 92 +++++++++++++++++++ 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index cd2b5141ebe8..1bdad771a923 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14792,11 +14792,14 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx, static bool EvaluateDestruction(const ASTContext &Ctx, APValue::LValueBase Base, APValue DestroyedValue, QualType Type, - SourceLocation Loc, Expr::EvalStatus &EStatus) { - EvalInfo Info(Ctx, EStatus, EvalInfo::EM_ConstantExpression); + SourceLocation Loc, Expr::EvalStatus &EStatus, + bool IsConstantDestruction) { + EvalInfo Info(Ctx, EStatus, + IsConstantDestruction ? EvalInfo::EM_ConstantExpression + : EvalInfo::EM_ConstantFold); Info.setEvaluatingDecl(Base, DestroyedValue, EvalInfo::EvaluatingDeclKind::Dtor); - Info.InConstantContext = true; + Info.InConstantContext = IsConstantDestruction; LValue LVal; LVal.set(Base); @@ -14850,7 +14853,8 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx, // If this is a class template argument, it's required to have constant // destruction too. if (Kind == ConstantExprKind::ClassTemplateArgument && - (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result) || + (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result, + true) || Result.HasSideEffects)) { // FIXME: Prefix a note to indicate that the problem is lack of constant // destruction. @@ -14916,6 +14920,10 @@ bool VarDecl::evaluateDestruction( Expr::EvalStatus EStatus; EStatus.Diag = &Notes; + // Only treat the destruction as constant destruction if we formally have + // constant initialization (or are usable in a constant expression). + bool IsConstantDestruction = hasConstantInitialization(); + // Make a copy of the value for the destructor to mutate, if we know it. // Otherwise, treat the value as default-initialized; if the destructor works // anyway, then the destruction is constant (and must be essentially empty). @@ -14926,7 +14934,8 @@ bool VarDecl::evaluateDestruction( return false; if (!EvaluateDestruction(getASTContext(), this, std::move(DestroyedValue), - getType(), getLocation(), EStatus) || + getType(), getLocation(), EStatus, + IsConstantDestruction) || EStatus.HasSideEffects) return false; diff --git a/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp b/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp index 967c83496ab9..d30fefe55b4f 100644 --- a/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp +++ b/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp @@ -4,6 +4,7 @@ // RUN: FileCheck -check-prefix=CHECK-DYN -input-file=%t.ll %s // RUN: FileCheck -check-prefix=CHECK-ARR -input-file=%t.ll %s // RUN: FileCheck -check-prefix=CHECK-FOLD -input-file=%t.ll %s +// RUN: FileCheck -check-prefix=CHECK-DTOR -input-file=%t.ll %s using size_t = decltype(sizeof(int)); @@ -131,3 +132,94 @@ void test_ref_to_static_var() { // CHECK-FOLD: store i32* @_ZZ22test_ref_to_static_varvE10i_constant, i32** %r, int &r = __builtin_is_constant_evaluated() ? i_constant : i_non_constant; } + +int not_constexpr; + +// __builtin_is_constant_evaluated() should never evaluate to true during +// destruction if it would not have done so during construction. +// +// FIXME: The standard doesn't say that it should ever return true when +// evaluating a destructor call, even for a constexpr variable. That seems +// obviously wrong. +struct DestructorBCE { + int n; + constexpr DestructorBCE(int n) : n(n) {} + constexpr ~DestructorBCE() { + if (!__builtin_is_constant_evaluated()) + not_constexpr = 1; + } +}; + +// CHECK-DTOR-NOT: @_ZN13DestructorBCED{{.*}}@global_dtor_bce_1 +DestructorBCE global_dtor_bce_1(101); + +// CHECK-DTOR: load i32, i32* @not_constexpr +// CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} @global_dtor_bce_2, i32 +// CHECK-DTOR: atexit{{.*}} @_ZN13DestructorBCED{{.*}} @global_dtor_bce_2 +// CHECK-DTOR: } +DestructorBCE global_dtor_bce_2(not_constexpr); + +// CHECK-DTOR-NOT: @_ZN13DestructorBCED{{.*}}@global_dtor_bce_3 +constexpr DestructorBCE global_dtor_bce_3(103); + +// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_1v( +void test_dtor_bce_1() { + // Variable is neither constant initialized (because it has automatic storage + // duration) nor usable in constant expressions, so BCE should not return + // true during destruction. It would be OK if we replaced the constructor + // call with a direct store, but we should emit the destructor call. + + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}}, i32 201) + DestructorBCE local(201); + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_2v( +void test_dtor_bce_2() { + // Non-constant init => BCE is false in destructor. + + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} + DestructorBCE local(not_constexpr); + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_3v( +void test_dtor_bce_3() { + // Should never call dtor for a constexpr variable. + + // CHECK-DTOR-NOT: call {{.*}} @_ZN13DestructorBCEC1Ei( + constexpr DestructorBCE local(203); + // CHECK-DTOR-NOT: @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_1v( +void test_dtor_bce_static_1() { + // Variable is constant initialized, so BCE returns true during constant + // destruction. + + // CHECK: store i32 301 + // CHECK-DTOR-NOT: @_ZN13DestructorBCEC1Ei({{.*}} + static DestructorBCE local(301); + // CHECK-DTOR-NOT: @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_2v( +void test_dtor_bce_static_2() { + // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} + static DestructorBCE local(not_constexpr); + // CHECK-DTOR: call {{.*}}atexit{{.*}} @_ZN13DestructorBCED + // CHECK-DTOR: } +} + +// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_3v( +void test_dtor_bce_static_3() { + // CHECK: store i32 303 + // CHECK-DTOR-NOT: @_ZN13DestructorBCEC1Ei({{.*}} + static constexpr DestructorBCE local(303); + // CHECK-DTOR-NOT: @_ZN13DestructorBCED + // CHECK-DTOR: } +} From 205ecd9b79c6915a85050246c961f167b494df43 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 9 Feb 2021 06:33:48 -0600 Subject: [PATCH 081/244] [DAGCombine] Do not remove masking argument to FP16_TO_FP for some targets As of commit 284f2bffc9bc5, the DAG Combiner gets rid of the masking of the input to this node if the mask only keeps the bottom 16 bits. This is because the underlying library function does not use the high order bits. However, on PowerPC's ELFv2 ABI, it is the caller that is responsible for clearing the bits from the register. Therefore, the library implementation of __gnu_h2f_ieee will return an incorrect result if the bits aren't cleared. This combine is desired for ARM (and possibly other targets) so this patch adds a query to Target Lowering to check if this zeroing needs to be kept. Fixes: https://bugs.llvm.org/show_bug.cgi?id=49092 Differential revision: https://reviews.llvm.org/D96283 (cherry picked from commit a5222aa0858a42660629c410a5b669dee16a4359) --- llvm/include/llvm/CodeGen/TargetLowering.h | 4 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 ++ .../PowerPC/handle-f16-storage-type.ll | 4 ++ llvm/test/CodeGen/PowerPC/pr48519.ll | 2 + llvm/test/CodeGen/PowerPC/pr49092.ll | 39 +++++++++++++++++++ 6 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr49092.ll diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index c3221aac8eea..40115fbd2f15 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2785,6 +2785,10 @@ class TargetLoweringBase { return false; } + /// Does this target require the clearing of high-order bits in a register + /// passed to the fp16 to fp conversion library function. + virtual bool shouldKeepZExtForFP16Conv() const { return false; } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 615bea2a4905..89670d708264 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21174,7 +21174,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) - if (N0->getOpcode() == ISD::AND) { + if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) { ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); if (AndConst && AndConst->getAPIntValue() == 0xffff) { return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 477105bd03ac..0dda2c181572 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -987,6 +987,9 @@ namespace llvm { shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override; + // Keep the zero-extensions for arguments to libcalls. + bool shouldKeepZExtForFP16Conv() const override { return true; } + /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll index 9977b6b33560..ab19afa2beb5 100644 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -1156,6 +1156,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { ; P8-NEXT: xscvsxdsp f1, f0 ; P8-NEXT: bl __gnu_f2h_ieee ; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop ; P8-NEXT: xsaddsp f1, f31, f1 @@ -1175,6 +1176,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { ; CHECK-NEXT: xscvhpdp f0, f0 ; CHECK-NEXT: xscvdphp f1, f1 ; CHECK-NEXT: mffprwz r3, f1 +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f1, r3 ; CHECK-NEXT: xscvhpdp f1, f1 ; CHECK-NEXT: xsaddsp f1, f0, f1 @@ -1225,6 +1227,7 @@ define half @PR40273(half) #0 { ; P8-NEXT: stdu r1, -32(r1) ; P8-NEXT: bl __gnu_f2h_ieee ; P8-NEXT: nop +; P8-NEXT: clrldi r3, r3, 48 ; P8-NEXT: bl __gnu_h2f_ieee ; P8-NEXT: nop ; P8-NEXT: xxlxor f0, f0, f0 @@ -1245,6 +1248,7 @@ define half @PR40273(half) #0 { ; CHECK-NEXT: xscvdphp f0, f1 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 ; CHECK-NEXT: xscvhpdp f0, f0 ; CHECK-NEXT: fcmpu cr0, f0, f1 diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll index 50970cb185d8..035cc49b93e6 100644 --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -22,6 +22,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-NEXT: xscvsxdsp f1, f0 ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: nop +; CHECK-NEXT: clrldi r3, r3, 48 ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: nop ; CHECK-NEXT: addi r30, r30, -1 @@ -46,6 +47,7 @@ define void @julia__typed_vcat_20() #0 { ; CHECK-P9-NEXT: xscvsxdsp f0, f0 ; CHECK-P9-NEXT: xscvdphp f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: clrlwi r3, r3, 16 ; CHECK-P9-NEXT: mtfprwz f0, r3 ; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: xscvhpdp f0, f0 diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll new file mode 100644 index 000000000000..2fce58418515 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr49092.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=CHECK-P9 + +define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: addi r3, r3, 11 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: test2: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: add r3, r4, r3 +; CHECK-P9-NEXT: addi r3, r3, 11 +; CHECK-P9-NEXT: clrlwi r3, r3, 16 +; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: xscvhpdp f1, f0 +; CHECK-P9-NEXT: blr +entry: + %add = add i64 %b, %a + %0 = trunc i64 %add to i16 + %conv = add i16 %0, 11 + %call = bitcast i16 %conv to half + ret half %call +} +attributes #0 = { nounwind } From 34cda01e235c549b56ffe30a7b09df0414d56ea0 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 2 Feb 2021 14:40:52 +0000 Subject: [PATCH 082/244] [RISCV] Fix incorrect RVV sdiv/udiv lowering Due to a clerical error, the sdiv operation was mapping to vdivu and udiv to vdiv, when the opposite mapping is the correct one. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D95869 (cherry picked from commit b4106f9c7b8c498d109301ced7bf9aca32027168) --- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 4 +- .../CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll | 88 +++++++++---------- .../CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll | 88 +++++++++---------- .../CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll | 88 +++++++++---------- .../CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll | 88 +++++++++---------- 5 files changed, 178 insertions(+), 178 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 79a1e6ddc8a2..dee67708bed1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -384,8 +384,8 @@ defm "" : VPatBinarySDNode_VV_VX; defm "" : VPatBinarySDNode_VV_VX; // 12.11. Vector Integer Divide Instructions -defm "" : VPatBinarySDNode_VV_VX; -defm "" : VPatBinarySDNode_VV_VX; +defm "" : VPatBinarySDNode_VV_VX; +defm "" : VPatBinarySDNode_VV_VX; defm "" : VPatBinarySDNode_VV_VX; defm "" : VPatBinarySDNode_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll index 239151274c4e..bbfc09d1c276 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll @@ -5,7 +5,7 @@ define @vdiv_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdiv_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -70,7 +70,7 @@ define @vdiv_vv_nxv2i8( %va, %va, %vb ret %vc @@ -80,7 +80,7 @@ define @vdiv_vx_nxv2i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -111,7 +111,7 @@ define @vdiv_vv_nxv4i8( %va, %va, %vb ret %vc @@ -121,7 +121,7 @@ define @vdiv_vx_nxv4i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -152,7 +152,7 @@ define @vdiv_vv_nxv8i8( %va, %va, %vb ret %vc @@ -162,7 +162,7 @@ define @vdiv_vx_nxv8i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -193,7 +193,7 @@ define @vdiv_vv_nxv16i8( %va, %va, %vb ret %vc @@ -203,7 +203,7 @@ define @vdiv_vx_nxv16i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -234,7 +234,7 @@ define @vdiv_vv_nxv32i8( %va, %va, %vb ret %vc @@ -244,7 +244,7 @@ define @vdiv_vx_nxv32i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -275,7 +275,7 @@ define @vdiv_vv_nxv64i8( %va, %va, %vb ret %vc @@ -285,7 +285,7 @@ define @vdiv_vx_nxv64i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -316,7 +316,7 @@ define @vdiv_vv_nxv1i16( %va, %va, %vb ret %vc @@ -326,7 +326,7 @@ define @vdiv_vx_nxv1i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -357,7 +357,7 @@ define @vdiv_vv_nxv2i16( %va, %va, %vb ret %vc @@ -367,7 +367,7 @@ define @vdiv_vx_nxv2i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -398,7 +398,7 @@ define @vdiv_vv_nxv4i16( %va, %va, %vb ret %vc @@ -408,7 +408,7 @@ define @vdiv_vx_nxv4i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -439,7 +439,7 @@ define @vdiv_vv_nxv8i16( %va, %va, %vb ret %vc @@ -449,7 +449,7 @@ define @vdiv_vx_nxv8i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -480,7 +480,7 @@ define @vdiv_vv_nxv16i16( %va, %va, %vb ret %vc @@ -490,7 +490,7 @@ define @vdiv_vx_nxv16i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -521,7 +521,7 @@ define @vdiv_vv_nxv32i16( %va, %va, %vb ret %vc @@ -531,7 +531,7 @@ define @vdiv_vx_nxv32i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -562,7 +562,7 @@ define @vdiv_vv_nxv1i32( %va, %va, %vb ret %vc @@ -572,7 +572,7 @@ define @vdiv_vx_nxv1i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -603,7 +603,7 @@ define @vdiv_vv_nxv2i32( %va, %va, %vb ret %vc @@ -613,7 +613,7 @@ define @vdiv_vx_nxv2i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -644,7 +644,7 @@ define @vdiv_vv_nxv4i32( %va, %va, %vb ret %vc @@ -654,7 +654,7 @@ define @vdiv_vx_nxv4i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -685,7 +685,7 @@ define @vdiv_vv_nxv8i32( %va, %va, %vb ret %vc @@ -695,7 +695,7 @@ define @vdiv_vx_nxv8i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -726,7 +726,7 @@ define @vdiv_vv_nxv16i32( %va, %va, %vb ret %vc @@ -736,7 +736,7 @@ define @vdiv_vx_nxv16i32( %va, i32 %b) { ; CHECK-LABEL: vdiv_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -767,7 +767,7 @@ define @vdiv_vv_nxv1i64( %va, %va, %vb ret %vc @@ -784,7 +784,7 @@ define @vdiv_vx_nxv1i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v26, v26, a1 ; CHECK-NEXT: vsrl.vx v26, v26, a1 ; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vdivu.vv v8, v8, v25 +; CHECK-NEXT: vdiv.vv v8, v8, v25 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -825,7 +825,7 @@ define @vdiv_vv_nxv2i64( %va, %va, %vb ret %vc @@ -842,7 +842,7 @@ define @vdiv_vx_nxv2i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v28, v28, a1 ; CHECK-NEXT: vsrl.vx v28, v28, a1 ; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vdivu.vv v8, v8, v26 +; CHECK-NEXT: vdiv.vv v8, v8, v26 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -883,7 +883,7 @@ define @vdiv_vv_nxv4i64( %va, %va, %vb ret %vc @@ -900,7 +900,7 @@ define @vdiv_vx_nxv4i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v12, v12, a1 ; CHECK-NEXT: vsrl.vx v12, v12, a1 ; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vdivu.vv v8, v8, v28 +; CHECK-NEXT: vdiv.vv v8, v8, v28 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -941,7 +941,7 @@ define @vdiv_vv_nxv8i64( %va, %va, %vb ret %vc @@ -958,7 +958,7 @@ define @vdiv_vx_nxv8i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v24, v24, a1 ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vdivu.vv v8, v8, v16 +; CHECK-NEXT: vdiv.vv v8, v8, v16 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll index 991cccf72cdd..b8f331e78b5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll @@ -5,7 +5,7 @@ define @vdiv_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdiv_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -46,7 +46,7 @@ define @vdiv_vv_nxv2i8( %va, %va, %vb ret %vc @@ -56,7 +56,7 @@ define @vdiv_vx_nxv2i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -87,7 +87,7 @@ define @vdiv_vv_nxv4i8( %va, %va, %vb ret %vc @@ -97,7 +97,7 @@ define @vdiv_vx_nxv4i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -128,7 +128,7 @@ define @vdiv_vv_nxv8i8( %va, %va, %vb ret %vc @@ -138,7 +138,7 @@ define @vdiv_vx_nxv8i8( %va, i8 signext %b) { ; CHECK-LABEL: vdiv_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -169,7 +169,7 @@ define @vdiv_vv_nxv16i8( %va, %va, %vb ret %vc @@ -179,7 +179,7 @@ define @vdiv_vx_nxv16i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -210,7 +210,7 @@ define @vdiv_vv_nxv32i8( %va, %va, %vb ret %vc @@ -220,7 +220,7 @@ define @vdiv_vx_nxv32i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -251,7 +251,7 @@ define @vdiv_vv_nxv64i8( %va, %va, %vb ret %vc @@ -261,7 +261,7 @@ define @vdiv_vx_nxv64i8( %va, i8 signext %b ; CHECK-LABEL: vdiv_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -292,7 +292,7 @@ define @vdiv_vv_nxv1i16( %va, %va, %vb ret %vc @@ -302,7 +302,7 @@ define @vdiv_vx_nxv1i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -333,7 +333,7 @@ define @vdiv_vv_nxv2i16( %va, %va, %vb ret %vc @@ -343,7 +343,7 @@ define @vdiv_vx_nxv2i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -374,7 +374,7 @@ define @vdiv_vv_nxv4i16( %va, %va, %vb ret %vc @@ -384,7 +384,7 @@ define @vdiv_vx_nxv4i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -415,7 +415,7 @@ define @vdiv_vv_nxv8i16( %va, %va, %vb ret %vc @@ -425,7 +425,7 @@ define @vdiv_vx_nxv8i16( %va, i16 signext % ; CHECK-LABEL: vdiv_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -456,7 +456,7 @@ define @vdiv_vv_nxv16i16( %va, %va, %vb ret %vc @@ -466,7 +466,7 @@ define @vdiv_vx_nxv16i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -497,7 +497,7 @@ define @vdiv_vv_nxv32i16( %va, %va, %vb ret %vc @@ -507,7 +507,7 @@ define @vdiv_vx_nxv32i16( %va, i16 signex ; CHECK-LABEL: vdiv_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -538,7 +538,7 @@ define @vdiv_vv_nxv1i32( %va, %va, %vb ret %vc @@ -548,7 +548,7 @@ define @vdiv_vx_nxv1i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -580,7 +580,7 @@ define @vdiv_vv_nxv2i32( %va, %va, %vb ret %vc @@ -590,7 +590,7 @@ define @vdiv_vx_nxv2i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -622,7 +622,7 @@ define @vdiv_vv_nxv4i32( %va, %va, %vb ret %vc @@ -632,7 +632,7 @@ define @vdiv_vx_nxv4i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -664,7 +664,7 @@ define @vdiv_vv_nxv8i32( %va, %va, %vb ret %vc @@ -674,7 +674,7 @@ define @vdiv_vx_nxv8i32( %va, i32 signext % ; CHECK-LABEL: vdiv_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -706,7 +706,7 @@ define @vdiv_vv_nxv16i32( %va, %va, %vb ret %vc @@ -716,7 +716,7 @@ define @vdiv_vx_nxv16i32( %va, i32 signex ; CHECK-LABEL: vdiv_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -748,7 +748,7 @@ define @vdiv_vv_nxv1i64( %va, %va, %vb ret %vc @@ -758,7 +758,7 @@ define @vdiv_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv1i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -796,7 +796,7 @@ define @vdiv_vv_nxv2i64( %va, %va, %vb ret %vc @@ -806,7 +806,7 @@ define @vdiv_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -844,7 +844,7 @@ define @vdiv_vv_nxv4i64( %va, %va, %vb ret %vc @@ -854,7 +854,7 @@ define @vdiv_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -892,7 +892,7 @@ define @vdiv_vv_nxv8i64( %va, %va, %vb ret %vc @@ -902,7 +902,7 @@ define @vdiv_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vdiv_vx_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vdivu.vx v8, v8, a0 +; CHECK-NEXT: vdiv.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll index 27b27cd64bae..383d3f380fe8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll @@ -5,7 +5,7 @@ define @vdivu_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdivu_vx_nxv1i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -68,7 +68,7 @@ define @vdivu_vv_nxv2i8( %va, %va, %vb ret %vc @@ -78,7 +78,7 @@ define @vdivu_vx_nxv2i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -107,7 +107,7 @@ define @vdivu_vv_nxv4i8( %va, %va, %vb ret %vc @@ -117,7 +117,7 @@ define @vdivu_vx_nxv4i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -146,7 +146,7 @@ define @vdivu_vv_nxv8i8( %va, %va, %vb ret %vc @@ -156,7 +156,7 @@ define @vdivu_vx_nxv8i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -185,7 +185,7 @@ define @vdivu_vv_nxv16i8( %va, %va, %vb ret %vc @@ -195,7 +195,7 @@ define @vdivu_vx_nxv16i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -224,7 +224,7 @@ define @vdivu_vv_nxv32i8( %va, %va, %vb ret %vc @@ -234,7 +234,7 @@ define @vdivu_vx_nxv32i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -263,7 +263,7 @@ define @vdivu_vv_nxv64i8( %va, %va, %vb ret %vc @@ -273,7 +273,7 @@ define @vdivu_vx_nxv64i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -302,7 +302,7 @@ define @vdivu_vv_nxv1i16( %va, %va, %vb ret %vc @@ -312,7 +312,7 @@ define @vdivu_vx_nxv1i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -342,7 +342,7 @@ define @vdivu_vv_nxv2i16( %va, %va, %vb ret %vc @@ -352,7 +352,7 @@ define @vdivu_vx_nxv2i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -382,7 +382,7 @@ define @vdivu_vv_nxv4i16( %va, %va, %vb ret %vc @@ -392,7 +392,7 @@ define @vdivu_vx_nxv4i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -422,7 +422,7 @@ define @vdivu_vv_nxv8i16( %va, %va, %vb ret %vc @@ -432,7 +432,7 @@ define @vdivu_vx_nxv8i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -462,7 +462,7 @@ define @vdivu_vv_nxv16i16( %va, %va, %vb ret %vc @@ -472,7 +472,7 @@ define @vdivu_vx_nxv16i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -502,7 +502,7 @@ define @vdivu_vv_nxv32i16( %va, %va, %vb ret %vc @@ -512,7 +512,7 @@ define @vdivu_vx_nxv32i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -542,7 +542,7 @@ define @vdivu_vv_nxv1i32( %va, %va, %vb ret %vc @@ -552,7 +552,7 @@ define @vdivu_vx_nxv1i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -582,7 +582,7 @@ define @vdivu_vv_nxv2i32( %va, %va, %vb ret %vc @@ -592,7 +592,7 @@ define @vdivu_vx_nxv2i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -622,7 +622,7 @@ define @vdivu_vv_nxv4i32( %va, %va, %vb ret %vc @@ -632,7 +632,7 @@ define @vdivu_vx_nxv4i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -662,7 +662,7 @@ define @vdivu_vv_nxv8i32( %va, %va, %vb ret %vc @@ -672,7 +672,7 @@ define @vdivu_vx_nxv8i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -702,7 +702,7 @@ define @vdivu_vv_nxv16i32( %va, %va, %vb ret %vc @@ -712,7 +712,7 @@ define @vdivu_vx_nxv16i32( %va, i32 %b) { ; CHECK-LABEL: vdivu_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -742,7 +742,7 @@ define @vdivu_vv_nxv1i64( %va, %va, %vb ret %vc @@ -759,7 +759,7 @@ define @vdivu_vx_nxv1i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v26, v26, a1 ; CHECK-NEXT: vsrl.vx v26, v26, a1 ; CHECK-NEXT: vor.vv v25, v26, v25 -; CHECK-NEXT: vdiv.vv v8, v8, v25 +; CHECK-NEXT: vdivu.vv v8, v8, v25 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -796,7 +796,7 @@ define @vdivu_vv_nxv2i64( %va, %va, %vb ret %vc @@ -813,7 +813,7 @@ define @vdivu_vx_nxv2i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v28, v28, a1 ; CHECK-NEXT: vsrl.vx v28, v28, a1 ; CHECK-NEXT: vor.vv v26, v28, v26 -; CHECK-NEXT: vdiv.vv v8, v8, v26 +; CHECK-NEXT: vdivu.vv v8, v8, v26 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -850,7 +850,7 @@ define @vdivu_vv_nxv4i64( %va, %va, %vb ret %vc @@ -867,7 +867,7 @@ define @vdivu_vx_nxv4i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v12, v12, a1 ; CHECK-NEXT: vsrl.vx v12, v12, a1 ; CHECK-NEXT: vor.vv v28, v12, v28 -; CHECK-NEXT: vdiv.vv v8, v8, v28 +; CHECK-NEXT: vdivu.vv v8, v8, v28 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -904,7 +904,7 @@ define @vdivu_vv_nxv8i64( %va, %va, %vb ret %vc @@ -921,7 +921,7 @@ define @vdivu_vx_nxv8i64( %va, i64 %b) { ; CHECK-NEXT: vsll.vx v24, v24, a1 ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v16, v24, v16 -; CHECK-NEXT: vdiv.vv v8, v8, v16 +; CHECK-NEXT: vdivu.vv v8, v8, v16 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll index 70cd4fba1eb7..bc72099d75eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll @@ -5,7 +5,7 @@ define @vdivu_vv_nxv1i8( %va, %va, %vb ret %vc @@ -15,7 +15,7 @@ define @vdivu_vx_nxv1i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -44,7 +44,7 @@ define @vdivu_vv_nxv2i8( %va, %va, %vb ret %vc @@ -54,7 +54,7 @@ define @vdivu_vx_nxv2i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -83,7 +83,7 @@ define @vdivu_vv_nxv4i8( %va, %va, %vb ret %vc @@ -93,7 +93,7 @@ define @vdivu_vx_nxv4i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -122,7 +122,7 @@ define @vdivu_vv_nxv8i8( %va, %va, %vb ret %vc @@ -132,7 +132,7 @@ define @vdivu_vx_nxv8i8( %va, i8 signext %b) ; CHECK-LABEL: vdivu_vx_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -161,7 +161,7 @@ define @vdivu_vv_nxv16i8( %va, %va, %vb ret %vc @@ -171,7 +171,7 @@ define @vdivu_vx_nxv16i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -200,7 +200,7 @@ define @vdivu_vv_nxv32i8( %va, %va, %vb ret %vc @@ -210,7 +210,7 @@ define @vdivu_vx_nxv32i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -239,7 +239,7 @@ define @vdivu_vv_nxv64i8( %va, %va, %vb ret %vc @@ -249,7 +249,7 @@ define @vdivu_vx_nxv64i8( %va, i8 signext % ; CHECK-LABEL: vdivu_vx_nxv64i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i8 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -278,7 +278,7 @@ define @vdivu_vv_nxv1i16( %va, %va, %vb ret %vc @@ -288,7 +288,7 @@ define @vdivu_vx_nxv1i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -318,7 +318,7 @@ define @vdivu_vv_nxv2i16( %va, %va, %vb ret %vc @@ -328,7 +328,7 @@ define @vdivu_vx_nxv2i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -358,7 +358,7 @@ define @vdivu_vv_nxv4i16( %va, %va, %vb ret %vc @@ -368,7 +368,7 @@ define @vdivu_vx_nxv4i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -398,7 +398,7 @@ define @vdivu_vv_nxv8i16( %va, %va, %vb ret %vc @@ -408,7 +408,7 @@ define @vdivu_vx_nxv8i16( %va, i16 signext ; CHECK-LABEL: vdivu_vx_nxv8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -438,7 +438,7 @@ define @vdivu_vv_nxv16i16( %va, %va, %vb ret %vc @@ -448,7 +448,7 @@ define @vdivu_vx_nxv16i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -478,7 +478,7 @@ define @vdivu_vv_nxv32i16( %va, %va, %vb ret %vc @@ -488,7 +488,7 @@ define @vdivu_vx_nxv32i16( %va, i16 signe ; CHECK-LABEL: vdivu_vx_nxv32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i16 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -518,7 +518,7 @@ define @vdivu_vv_nxv1i32( %va, %va, %vb ret %vc @@ -528,7 +528,7 @@ define @vdivu_vx_nxv1i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -558,7 +558,7 @@ define @vdivu_vv_nxv2i32( %va, %va, %vb ret %vc @@ -568,7 +568,7 @@ define @vdivu_vx_nxv2i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -598,7 +598,7 @@ define @vdivu_vv_nxv4i32( %va, %va, %vb ret %vc @@ -608,7 +608,7 @@ define @vdivu_vx_nxv4i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -638,7 +638,7 @@ define @vdivu_vv_nxv8i32( %va, %va, %vb ret %vc @@ -648,7 +648,7 @@ define @vdivu_vx_nxv8i32( %va, i32 signext ; CHECK-LABEL: vdivu_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -678,7 +678,7 @@ define @vdivu_vv_nxv16i32( %va, %va, %vb ret %vc @@ -688,7 +688,7 @@ define @vdivu_vx_nxv16i32( %va, i32 signe ; CHECK-LABEL: vdivu_vx_nxv16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i32 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -718,7 +718,7 @@ define @vdivu_vv_nxv1i64( %va, %va, %vb ret %vc @@ -728,7 +728,7 @@ define @vdivu_vx_nxv1i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv1i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -760,7 +760,7 @@ define @vdivu_vv_nxv2i64( %va, %va, %vb ret %vc @@ -770,7 +770,7 @@ define @vdivu_vx_nxv2i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -802,7 +802,7 @@ define @vdivu_vv_nxv4i64( %va, %va, %vb ret %vc @@ -812,7 +812,7 @@ define @vdivu_vx_nxv4i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -844,7 +844,7 @@ define @vdivu_vv_nxv8i64( %va, %va, %vb ret %vc @@ -854,7 +854,7 @@ define @vdivu_vx_nxv8i64( %va, i64 %b) { ; CHECK-LABEL: vdivu_vx_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; CHECK-NEXT: vdiv.vx v8, v8, a0 +; CHECK-NEXT: vdivu.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer From 2cf21fd6a5b4a6f0f0da55717a787fc38202cca8 Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Thu, 17 Dec 2020 23:58:13 +0100 Subject: [PATCH 083/244] [Support] Indent multi-line descr of enum cli options. As noted in https://reviews.llvm.org/D93459, the formatting of multi-line descriptions of clEnumValN and the likes is unfavorable. Thus this patch adds support for correctly indenting these. Reviewed By: serge-sans-paille Differential Revision: https://reviews.llvm.org/D93494 (cherry picked from commit e3f02302e318837d2421c6425450f04ae0a82b90) --- llvm/include/llvm/Support/CommandLine.h | 13 +++++++++++ llvm/lib/Support/CommandLine.cpp | 25 ++++++++++++++++------ llvm/unittests/Support/CommandLineTest.cpp | 22 +++++++++++++++++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 38f3e188be55..0706aa226c0e 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -369,9 +369,22 @@ class Option { virtual void setDefault() = 0; + // Prints the help string for an option. + // + // This maintains the Indent for multi-line descriptions. + // FirstLineIndentedBy is the count of chars of the first line + // i.e. the one containing the --