From 7051e794ceb6399429ab1b961a13e6876ea93943 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 27 Jan 2021 15:21:13 -0800
Subject: [PATCH 001/244] Drop the 'git' suffix from various version variables

---
 libcxx/CMakeLists.txt    | 2 +-
 libcxxabi/CMakeLists.txt | 2 +-
 libunwind/CMakeLists.txt | 2 +-
 llvm/CMakeLists.txt      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 4e7e8f978546..9bf1a02f0908 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -29,7 +29,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL
   project(libcxx CXX C)
 
   set(PACKAGE_NAME libcxx)
-  set(PACKAGE_VERSION 12.0.0git)
+  set(PACKAGE_VERSION 12.0.0)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index b803347c2a8e..426c855288fc 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -28,7 +28,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B
   project(libcxxabi CXX C)
 
   set(PACKAGE_NAME libcxxabi)
-  set(PACKAGE_VERSION 11.0.0git)
+  set(PACKAGE_VERSION 11.0.0)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 8ae32fbccf4e..48cb8e004e08 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -24,7 +24,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B
   project(libunwind LANGUAGES C CXX ASM)
 
   set(PACKAGE_NAME libunwind)
-  set(PACKAGE_VERSION 12.0.0git)
+  set(PACKAGE_VERSION 12.0.0)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 454ec561af9a..277d0fe54d7b 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -14,7 +14,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX git)
+  set(LLVM_VERSION_SUFFIX "")
 endif()
 
 if (NOT PACKAGE_VERSION)

From f2a45d31b9c11f2b3e12f161391fe845025b5177 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 27 Jan 2021 15:17:48 -0800
Subject: [PATCH 002/244] Import workflows from release/11.x branch

---
 .github/workflows/clang-tests.yml  |  43 +++++++++++
 .github/workflows/libclc-tests.yml |  53 +++++++++++++
 .github/workflows/lld-tests.yml    |  43 +++++++++++
 .github/workflows/lldb-tests.yml   |  48 ++++++++++++
 .github/workflows/llvm-tests.yml   | 116 +++++++++++++++++++++++++++++
 5 files changed, 303 insertions(+)
 create mode 100644 .github/workflows/clang-tests.yml
 create mode 100644 .github/workflows/libclc-tests.yml
 create mode 100644 .github/workflows/lld-tests.yml
 create mode 100644 .github/workflows/lldb-tests.yml
 create mode 100644 .github/workflows/llvm-tests.yml

diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml
new file mode 100644
index 000000000000..f8ca65e10726
--- /dev/null
+++ b/.github/workflows/clang-tests.yml
@@ -0,0 +1,43 @@
+name: Clang Tests
+
+on:
+  push:
+    branches:
+      - 'release/**'
+    paths:
+      - 'clang/**'
+      - 'llvm/**'
+      - '.github/workflows/clang-tests.yml'
+  pull_request:
+    paths:
+      - 'clang/**'
+      - 'llvm/**'
+      - '.github/workflows/clang-tests.yml'
+
+jobs:
+  build_clang:
+    name: clang check-all
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macOS-latest
+    steps:
+    - name: Setup Windows
+      if: startsWith(matrix.os, 'windows')
+      uses: llvm/actions/setup-windows@master
+      with:
+        arch: amd64
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@master
+    - uses: actions/checkout@v1
+      with:
+        fetch-depth: 1
+    - name: Test clang
+      uses: llvm/actions/build-test-llvm-project@master
+      with:
+        cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release
+        build_target: check-clang
diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml
new file mode 100644
index 000000000000..4e8639b1c89a
--- /dev/null
+++ b/.github/workflows/libclc-tests.yml
@@ -0,0 +1,53 @@
+name: libclc Tests
+
+on:
+  push:
+    branches:
+      - 'release/**'
+    paths:
+      - 'clang/**'
+      - 'llvm/**'
+      - 'libclc/**'
+      - '.github/workflows/libclc-tests.yml'
+  pull_request:
+    paths:
+      - 'clang/**'
+      - 'llvm/**'
+      - 'libclc/**'
+      - '.github/workflows/libclc-tests.yml'
+
+jobs:
+  build_libclc:
+    name: libclc test
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          # Disable build on windows, because I can't figure out where llvm-config is.
+          #- windows-latest
+          - macOS-latest
+    steps:
+    - name: Setup Windows
+      if: startsWith(matrix.os, 'windows')
+      uses: llvm/actions/setup-windows@master
+      with:
+        arch: amd64
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@master
+    - uses: actions/checkout@v1
+      with:
+        fetch-depth: 1
+    - name: Build clang
+      uses: llvm/actions/build-test-llvm-project@master
+      with:
+        cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release
+        build_target: ""
+    - name: Build and test libclc
+      run: |
+        mkdir libclc-build
+        cd libclc-build
+        cmake -G Ninja ../libclc -DLLVM_CONFIG=../build/bin/llvm-config
+        ninja
+        ninja test
diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml
new file mode 100644
index 000000000000..9b4cbe95f231
--- /dev/null
+++ b/.github/workflows/lld-tests.yml
@@ -0,0 +1,43 @@
+name: LLD Tests
+
+on:
+  push:
+    branches:
+      - 'release/**'
+    paths:
+      - 'lld/**'
+      - 'llvm/**'
+      - '.github/workflows/lld-tests.yml'
+  pull_request:
+    paths:
+      - 'lld/**'
+      - 'llvm/**'
+      - '.github/workflows/lld-tests.yml'
+
+jobs:
+  build_lld:
+    name: lld check-all
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macOS-latest
+    steps:
+    - name: Setup Windows
+      if: startsWith(matrix.os, 'windows')
+      uses: llvm/actions/setup-windows@master
+      with:
+        arch: amd64
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@master
+    - uses: actions/checkout@v1
+      with:
+        fetch-depth: 1
+    - name: Test lld
+      uses: llvm/actions/build-test-llvm-project@master
+      with:
+        cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="lld" -DCMAKE_BUILD_TYPE=Release
+        build_target: check-lld
diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml
new file mode 100644
index 000000000000..229e6deece6e
--- /dev/null
+++ b/.github/workflows/lldb-tests.yml
@@ -0,0 +1,48 @@
+name: lldb Tests
+
+on:
+  push:
+    branches:
+      - 'release/**'
+    paths:
+      - 'clang/**'
+      - 'llvm/**'
+      - 'lldb/**'
+      - '.github/workflows/lldb-tests.yml'
+  pull_request:
+    paths:
+      - 'clang/**'
+      - 'llvm/**'
+      - 'lldb/**'
+      - '.github/workflows/lldb-tests.yml'
+
+jobs:
+  build_lldb:
+    name: lldb build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - windows-latest
+          # macOS build disabled due to: llvm.org/PR46190
+          #- macOS-latest
+    steps:
+    - name: Setup Windows
+      if: startsWith(matrix.os, 'windows')
+      uses: llvm/actions/setup-windows@master
+      with:
+        arch: amd64
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@master
+    - uses: actions/checkout@v1
+      with:
+        fetch-depth: 1
+    - name: Build lldb
+      uses: llvm/actions/build-test-llvm-project@master
+      with:
+        # Mac OS requries that libcxx is enabled for lldb tests, so we need  to disable them.
+        cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang;lldb" -DCMAKE_BUILD_TYPE=Release -DLLDB_INCLUDE_TESTS=OFF
+        # check-lldb is not consistent, so we only build lldb.
+        build_target: ""
diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
new file mode 100644
index 000000000000..67f318ad849f
--- /dev/null
+++ b/.github/workflows/llvm-tests.yml
@@ -0,0 +1,116 @@
+name: LLVM Tests
+
+env:
+  release_major: 12
+
+on:
+  push:
+    branches:
+      - 'release/**'
+    paths:
+      - 'llvm/**'
+      - '.github/workflows/llvm-tests.yml'
+  pull_request:
+    paths:
+      - 'llvm/**'
+      - '.github/workflows/llvm-tests.yml'
+
+jobs:
+  build_llvm:
+    name: llvm check-all
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macOS-latest
+    steps:
+    - name: Setup Windows
+      if: startsWith(matrix.os, 'windows')
+      uses: llvm/actions/setup-windows@master
+      with:
+        arch: amd64
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@master
+    - uses: actions/checkout@v1
+      with:
+        fetch-depth: 1
+    - name: Test llvm
+      uses: llvm/actions/build-test-llvm-project@master
+      with:
+        cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release
+
+  abi-dump:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        name:
+          - build-baseline
+          - build-latest
+        include:
+          - name: build-baseline
+            # FIXME: Referencing the env context does not work here
+            # ref: llvmorg-${{ env.release_major }}.0.0
+            ref: llvmorg-12.0.0
+            repo: llvm/llvm-project
+          - name: build-latest
+            ref: ${{ github.sha }}
+            repo: ${{ github.repository }}
+    steps:
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@master
+    - name: Install abi-compliance-checker
+      run: |
+        sudo apt-get install abi-dumper autoconf pkg-config
+    - name: Install universal-ctags
+      run: |
+        git clone https://github.com/universal-ctags/ctags.git
+        cd ctags
+        ./autogen.sh
+        ./configure
+        sudo make install
+    - name: Download source code
+      uses: llvm/actions/get-llvm-project-src@master
+      with:
+        ref: ${{ matrix.ref }}
+        repo: ${{ matrix.repo }}
+    - name: Configure
+      run: |
+        mkdir build
+        cd build
+        cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" ../llvm
+    - name: Build
+      run: ninja -C build libLLVM-${{ env.release_major }}.so
+    - name: Dump ABI
+      run: abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers llvm/include -o ${{ matrix.ref }}.abi.tar.gz build/lib/libLLVM-${{ env.release_major }}.so
+    - name: Upload ABI file
+      uses: actions/upload-artifact@v1
+      with:
+        name: ${{ matrix.name }}
+        path: ${{ matrix.ref }}.abi.tar.gz
+
+  abi-compare:
+    runs-on: ubuntu-latest
+    needs:
+      - abi-dump
+    steps:
+      - name: Download baseline
+        uses: actions/download-artifact@v1
+        with:
+          name: build-baseline
+      - name: Download latest
+        uses: actions/download-artifact@v1
+        with:
+          name: build-latest
+      - name: Install abi-compliance-checker
+        run: sudo apt-get install abi-compliance-checker
+      - name: Compare ABI
+        run: abi-compliance-checker -l libLLVM-${{ env.release_major}}.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz
+      - name: Upload ABI Comparison
+        if: always()
+        uses: actions/upload-artifact@v1
+        with:
+          name: compat-report-${{ github.sha }}
+          path: compat_reports/

From d64226e8fab8fc7b4d947223c61036a60eb6a871 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 27 Jan 2021 15:32:05 +0100
Subject: [PATCH 003/244] [clangd] Work around GCC bug 66735

(cherry picked from commit 12de8e1399fecf691639ba430b3824acb1311e70)
---
 clang-tools-extra/clangd/ParsedAST.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 403d3fe3e64f..1020282f5ee8 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -316,8 +316,8 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
       Check->registerMatchers(&CTFinder);
     }
 
-    ASTDiags.setLevelAdjuster([&, &Cfg(Config::current())](
-                                  DiagnosticsEngine::Level DiagLevel,
+    const Config& Cfg = Config::current();
+    ASTDiags.setLevelAdjuster([&](DiagnosticsEngine::Level DiagLevel,
                                   const clang::Diagnostic &Info) {
       if (Cfg.Diagnostics.SuppressAll ||
           isBuiltinDiagnosticSuppressed(Info.getID(), Cfg.Diagnostics.Suppress))

From ea99c885a63de9af673a5e5cd51f44fb70c83c1b Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Wed, 27 Jan 2021 12:24:30 -0800
Subject: [PATCH 004/244] Permit __VA_OPT__ in all language modes and allow it
 to be detected with #ifdef.

These changes are intended to give code a path to move away from the GNU
,##__VA_ARGS__ extension, which is non-conforming in some situations and
which we'd like to disable in our conforming mode in those cases.

(cherry picked from commit 0436ec2128c9775ba13b0308937238fc79673fdd)
---
 clang/include/clang/Lex/Preprocessor.h        | 19 ++++++++++++
 .../include/clang/Lex/VariadicMacroSupport.h  | 10 ++----
 clang/lib/Lex/PPDirectives.cpp                |  5 +++
 clang/lib/Lex/PPExpressions.cpp               |  5 +++
 clang/lib/Lex/PPMacroExpansion.cpp            |  6 +++-
 clang/lib/Lex/Preprocessor.cpp                | 19 +++++-------
 clang/test/Preprocessor/macro_vaopt_check.cpp | 31 ++++++++++++++++++-
 .../test/Preprocessor/macro_vaopt_expand.cpp  |  4 ++-
 8 files changed, 78 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 68139cb24b31..ba8bdaa23c4c 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -447,6 +447,25 @@ class Preprocessor {
           ElseLoc(ElseLoc) {}
   };
 
+  class IfdefMacroNameScopeRAII {
+    Preprocessor &PP;
+    bool VAOPTWasPoisoned;
+
+  public:
+    IfdefMacroNameScopeRAII(Preprocessor &PP)
+        : PP(PP), VAOPTWasPoisoned(PP.Ident__VA_OPT__->isPoisoned()) {
+      PP.Ident__VA_OPT__->setIsPoisoned(false);
+    }
+    IfdefMacroNameScopeRAII(const IfdefMacroNameScopeRAII&) = delete;
+    IfdefMacroNameScopeRAII &operator=(const IfdefMacroNameScopeRAII&) = delete;
+    ~IfdefMacroNameScopeRAII() { Exit(); }
+
+    void Exit() {
+      if (VAOPTWasPoisoned)
+        PP.Ident__VA_OPT__->setIsPoisoned(true);
+    }
+  };
+
 private:
   friend class ASTReader;
   friend class MacroArgs;
diff --git a/clang/include/clang/Lex/VariadicMacroSupport.h b/clang/include/clang/Lex/VariadicMacroSupport.h
index 989e0ac703c9..119f02201fc6 100644
--- a/clang/include/clang/Lex/VariadicMacroSupport.h
+++ b/clang/include/clang/Lex/VariadicMacroSupport.h
@@ -39,17 +39,14 @@ namespace clang {
       assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned "
                                               "outside an ISO C/C++ variadic "
                                               "macro definition!");
-      assert(
-          !Ident__VA_OPT__ ||
-          (Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!"));
+      assert(Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!");
     }
 
     /// Client code should call this function just before the Preprocessor is
     /// about to Lex tokens from the definition of a variadic (ISO C/C++) macro.
     void enterScope() {
       Ident__VA_ARGS__->setIsPoisoned(false);
-      if (Ident__VA_OPT__)
-        Ident__VA_OPT__->setIsPoisoned(false);
+      Ident__VA_OPT__->setIsPoisoned(false);
     }
 
     /// Client code should call this function as soon as the Preprocessor has
@@ -58,8 +55,7 @@ namespace clang {
     /// (might be explicitly called, and then reinvoked via the destructor).
     void exitScope() {
       Ident__VA_ARGS__->setIsPoisoned(true);
-      if (Ident__VA_OPT__)
-        Ident__VA_OPT__->setIsPoisoned(true);
+      Ident__VA_OPT__->setIsPoisoned(true);
     }
 
     ~VariadicMacroScopeGuard() { exitScope(); }
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index d6b03d85913d..e2aa93455ea5 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2928,9 +2928,14 @@ void Preprocessor::HandleIfdefDirective(Token &Result,
   ++NumIf;
   Token DirectiveTok = Result;
 
+  // __VA_OPT__ is allowed as the operand of #if[n]def.
+  IfdefMacroNameScopeRAII IfdefMacroNameScope(*this);
+
   Token MacroNameTok;
   ReadMacroName(MacroNameTok);
 
+  IfdefMacroNameScope.Exit();
+
   // Error reading macro name?  If so, diagnostic already issued.
   if (MacroNameTok.is(tok::eod)) {
     // Skip code until we get to #endif.  This helps with recovery by not
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 8c120c13d7d2..952fb8f121dc 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -104,6 +104,9 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   SourceLocation beginLoc(PeekTok.getLocation());
   Result.setBegin(beginLoc);
 
+  // __VA_OPT__ is allowed as the operand of 'defined'.
+  Preprocessor::IfdefMacroNameScopeRAII IfdefMacroNameScope(PP);
+
   // Get the next token, don't expand it.
   PP.LexUnexpandedNonComment(PeekTok);
 
@@ -122,6 +125,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.LexUnexpandedNonComment(PeekTok);
   }
 
+  IfdefMacroNameScope.Exit();
+
   // If we don't have a pp-identifier now, this is an error.
   if (PP.CheckMacroName(PeekTok, MU_Other))
     return true;
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 43d31d6c5732..f6ca04defeb9 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -323,13 +323,16 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) {
 
 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
 /// table and mark it as a builtin macro to be expanded.
-static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
+static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name,
+                                            bool Disabled = false) {
   // Get the identifier.
   IdentifierInfo *Id = PP.getIdentifierInfo(Name);
 
   // Mark it as being a macro that is builtin.
   MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
   MI->setIsBuiltinMacro();
+  if (Disabled)
+    MI->DisableMacro();
   PP.appendDefMacroDirective(Id, MI);
   return Id;
 }
@@ -343,6 +346,7 @@ void Preprocessor::RegisterBuiltinMacros() {
   Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__");
   Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__");
   Ident_Pragma  = RegisterBuiltinMacro(*this, "_Pragma");
+  Ident__VA_OPT__ = RegisterBuiltinMacro(*this, "__VA_OPT__", true);
 
   // C++ Standing Document Extensions.
   if (getLangOpts().CPlusPlus)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 94f1ce91f884..9baba204b324 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -115,23 +115,20 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
 
   BuiltinInfo = std::make_unique<Builtin::Context>();
 
-  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
-  // a macro. They get unpoisoned where it is allowed.
-  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
-  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
-  if (getLangOpts().CPlusPlus20) {
-    (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
-    SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
-  } else {
-    Ident__VA_OPT__ = nullptr;
-  }
-
   // Initialize the pragma handlers.
   RegisterBuiltinPragmas();
 
   // Initialize builtin macros like __LINE__ and friends.
   RegisterBuiltinMacros();
 
+  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
+  // a macro. They get unpoisoned where it is allowed. Note that we model
+  // __VA_OPT__ as a builtin macro to allow #ifdef and friends to detect it.
+  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
+  SetPoisonReason(Ident__VA_ARGS__, diag::ext_pp_bad_vaargs_use);
+  Ident__VA_OPT__->setIsPoisoned();
+  SetPoisonReason(Ident__VA_OPT__, diag::ext_pp_bad_vaopt_use);
+
   if(LangOpts.Borland) {
     Ident__exception_info        = getIdentifierInfo("_exception_info");
     Ident___exception_info       = getIdentifierInfo("__exception_info");
diff --git a/clang/test/Preprocessor/macro_vaopt_check.cpp b/clang/test/Preprocessor/macro_vaopt_check.cpp
index fb52e9946af3..84f3b85871dd 100644
--- a/clang/test/Preprocessor/macro_vaopt_check.cpp
+++ b/clang/test/Preprocessor/macro_vaopt_check.cpp
@@ -1,4 +1,20 @@
-// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++2a
+// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++20
+// RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++11
+// RUN: %clang_cc1 -x c %s -Eonly -verify -Wno-all -pedantic -std=c99
+
+// Check that support for __VA_OPT__ can be detected by #ifdef.
+#ifndef __VA_OPT__
+#error should be defined
+#endif
+
+#ifdef __VA_OPT__
+#else
+#error should be defined
+#endif
+
+#if !defined(__VA_OPT__)
+#error should be defined
+#endif
 
 //expected-error@+1{{missing '('}}
 #define V1(...) __VA_OPT__  
@@ -62,3 +78,16 @@
 #define V1(...) __VA_OPT__  ((())
 #undef V1
 
+// __VA_OPT__ can't appear anywhere else.
+#if __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}}
+#endif
+
+#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}}
+
+// Check defined(__VA_OPT__) doesn't leave __VA_OPT__ poisoned.
+#define Z(...) (0 __VA_OPT__(|| 1))
+#if defined(__VA_OPT__) && Z(hello)
+// OK
+#else
+#error bad
+#endif
diff --git a/clang/test/Preprocessor/macro_vaopt_expand.cpp b/clang/test/Preprocessor/macro_vaopt_expand.cpp
index 7ec4f6128cfa..5eb0facb83f7 100644
--- a/clang/test/Preprocessor/macro_vaopt_expand.cpp
+++ b/clang/test/Preprocessor/macro_vaopt_expand.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -E %s -pedantic -std=c++2a | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 -E %s -pedantic -std=c++20 | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 -E %s -pedantic -std=c++11 | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 -E -x c %s -pedantic -std=c99 | FileCheck -strict-whitespace %s
 
 #define LPAREN ( 
 #define RPAREN ) 

From 9ea2a107ca4055a3a4960cb6dffb84b7f43bd8ea Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Wed, 27 Jan 2021 13:14:02 -0800
Subject: [PATCH 005/244] Don't allow __VA_OPT__ to be detected by #ifdef.

More study has discovered this to not actually be useful: because
current C++20 implementations reject `#ifdef __VA_OPT__`, this can't
really be used as a feature-test mechanism. And it's not too hard to
detect __VA_OPT__ without this, for example:

  #define THIRD_ARG(a, b, c, ...) c
  #define HAS_VA_OPT(...) THIRD_ARG(__VA_OPT__(,), 1, 0, )
  #if HAS_VA_OPT(?)

Partially reverts 0436ec2128c9775ba13b0308937238fc79673fdd.

(cherry picked from commit 5dfa37a76153f2a18ac7fe30721cc1332b672ea2)
---
 clang/include/clang/Lex/Preprocessor.h        | 19 --------------
 clang/lib/Lex/PPDirectives.cpp                |  5 ----
 clang/lib/Lex/PPExpressions.cpp               |  5 ----
 clang/lib/Lex/PPMacroExpansion.cpp            |  6 +----
 clang/lib/Lex/Preprocessor.cpp                | 15 ++++++-----
 clang/test/Preprocessor/macro_vaopt_check.cpp | 25 +++----------------
 6 files changed, 11 insertions(+), 64 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index ba8bdaa23c4c..68139cb24b31 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -447,25 +447,6 @@ class Preprocessor {
           ElseLoc(ElseLoc) {}
   };
 
-  class IfdefMacroNameScopeRAII {
-    Preprocessor &PP;
-    bool VAOPTWasPoisoned;
-
-  public:
-    IfdefMacroNameScopeRAII(Preprocessor &PP)
-        : PP(PP), VAOPTWasPoisoned(PP.Ident__VA_OPT__->isPoisoned()) {
-      PP.Ident__VA_OPT__->setIsPoisoned(false);
-    }
-    IfdefMacroNameScopeRAII(const IfdefMacroNameScopeRAII&) = delete;
-    IfdefMacroNameScopeRAII &operator=(const IfdefMacroNameScopeRAII&) = delete;
-    ~IfdefMacroNameScopeRAII() { Exit(); }
-
-    void Exit() {
-      if (VAOPTWasPoisoned)
-        PP.Ident__VA_OPT__->setIsPoisoned(true);
-    }
-  };
-
 private:
   friend class ASTReader;
   friend class MacroArgs;
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e2aa93455ea5..d6b03d85913d 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2928,14 +2928,9 @@ void Preprocessor::HandleIfdefDirective(Token &Result,
   ++NumIf;
   Token DirectiveTok = Result;
 
-  // __VA_OPT__ is allowed as the operand of #if[n]def.
-  IfdefMacroNameScopeRAII IfdefMacroNameScope(*this);
-
   Token MacroNameTok;
   ReadMacroName(MacroNameTok);
 
-  IfdefMacroNameScope.Exit();
-
   // Error reading macro name?  If so, diagnostic already issued.
   if (MacroNameTok.is(tok::eod)) {
     // Skip code until we get to #endif.  This helps with recovery by not
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 952fb8f121dc..8c120c13d7d2 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -104,9 +104,6 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
   SourceLocation beginLoc(PeekTok.getLocation());
   Result.setBegin(beginLoc);
 
-  // __VA_OPT__ is allowed as the operand of 'defined'.
-  Preprocessor::IfdefMacroNameScopeRAII IfdefMacroNameScope(PP);
-
   // Get the next token, don't expand it.
   PP.LexUnexpandedNonComment(PeekTok);
 
@@ -125,8 +122,6 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     PP.LexUnexpandedNonComment(PeekTok);
   }
 
-  IfdefMacroNameScope.Exit();
-
   // If we don't have a pp-identifier now, this is an error.
   if (PP.CheckMacroName(PeekTok, MU_Other))
     return true;
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index f6ca04defeb9..43d31d6c5732 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -323,16 +323,13 @@ void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) {
 
 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
 /// table and mark it as a builtin macro to be expanded.
-static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name,
-                                            bool Disabled = false) {
+static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
   // Get the identifier.
   IdentifierInfo *Id = PP.getIdentifierInfo(Name);
 
   // Mark it as being a macro that is builtin.
   MacroInfo *MI = PP.AllocateMacroInfo(SourceLocation());
   MI->setIsBuiltinMacro();
-  if (Disabled)
-    MI->DisableMacro();
   PP.appendDefMacroDirective(Id, MI);
   return Id;
 }
@@ -346,7 +343,6 @@ void Preprocessor::RegisterBuiltinMacros() {
   Ident__TIME__ = RegisterBuiltinMacro(*this, "__TIME__");
   Ident__COUNTER__ = RegisterBuiltinMacro(*this, "__COUNTER__");
   Ident_Pragma  = RegisterBuiltinMacro(*this, "_Pragma");
-  Ident__VA_OPT__ = RegisterBuiltinMacro(*this, "__VA_OPT__", true);
 
   // C++ Standing Document Extensions.
   if (getLangOpts().CPlusPlus)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 9baba204b324..177786d90390 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -115,20 +115,19 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
 
   BuiltinInfo = std::make_unique<Builtin::Context>();
 
+  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
+  // a macro. They get unpoisoned where it is allowed.
+  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
+  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
+  (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
+  SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
+
   // Initialize the pragma handlers.
   RegisterBuiltinPragmas();
 
   // Initialize builtin macros like __LINE__ and friends.
   RegisterBuiltinMacros();
 
-  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
-  // a macro. They get unpoisoned where it is allowed. Note that we model
-  // __VA_OPT__ as a builtin macro to allow #ifdef and friends to detect it.
-  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
-  SetPoisonReason(Ident__VA_ARGS__, diag::ext_pp_bad_vaargs_use);
-  Ident__VA_OPT__->setIsPoisoned();
-  SetPoisonReason(Ident__VA_OPT__, diag::ext_pp_bad_vaopt_use);
-
   if(LangOpts.Borland) {
     Ident__exception_info        = getIdentifierInfo("_exception_info");
     Ident___exception_info       = getIdentifierInfo("__exception_info");
diff --git a/clang/test/Preprocessor/macro_vaopt_check.cpp b/clang/test/Preprocessor/macro_vaopt_check.cpp
index 84f3b85871dd..c5c0ac518bc0 100644
--- a/clang/test/Preprocessor/macro_vaopt_check.cpp
+++ b/clang/test/Preprocessor/macro_vaopt_check.cpp
@@ -2,20 +2,6 @@
 // RUN: %clang_cc1 %s -Eonly -verify -Wno-all -pedantic -std=c++11
 // RUN: %clang_cc1 -x c %s -Eonly -verify -Wno-all -pedantic -std=c99
 
-// Check that support for __VA_OPT__ can be detected by #ifdef.
-#ifndef __VA_OPT__
-#error should be defined
-#endif
-
-#ifdef __VA_OPT__
-#else
-#error should be defined
-#endif
-
-#if !defined(__VA_OPT__)
-#error should be defined
-#endif
-
 //expected-error@+1{{missing '('}}
 #define V1(...) __VA_OPT__  
 #undef V1
@@ -82,12 +68,7 @@
 #if __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}}
 #endif
 
-#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}}
-
-// Check defined(__VA_OPT__) doesn't leave __VA_OPT__ poisoned.
-#define Z(...) (0 __VA_OPT__(|| 1))
-#if defined(__VA_OPT__) && Z(hello)
-// OK
-#else
-#error bad
+#ifdef __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}}
 #endif
+
+#define BAD __VA_OPT__ // expected-warning {{__VA_OPT__ can only appear in the expansion of a variadic macro}}

From 9df2b64fc5fa911ca59b3f646806ca3fd6787c2d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Wed, 27 Jan 2021 16:07:51 -0800
Subject: [PATCH 006/244] [cxx_status] Mark P0732R2 as only 'partial', not
 'Clang 12', as some of the changes were reverted.

(cherry picked from commit 727fc31a9898dfb89610ca1bc05ff86204a77177)
---
 clang/www/cxx_status.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index 685f32dbe0d3..fc3340ec9d96 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -1005,7 +1005,7 @@ <h2 id="cxx20">C++20 implementation status</h2>
     <tr>
       <td rowspan="2">Class types as non-type template parameters</td>
       <td><a href="https://wg21.link/p0732r2">P0732R2</a></td>
-      <td rowspan="2" class="unreleased" align="center">Clang 12</td>
+      <td rowspan="2" class="partial" align="center">Partial</td>
     </tr>
       <tr> <!-- from Belfast -->
         <td><a href="https://wg21.link/p1907r1">P1907R1</a></td>

From 8d22f25d155113f9cfdf3952dc49088c820f2a77 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 27 Jan 2021 16:28:04 -0800
Subject: [PATCH 007/244] [llvm-c] Move LLVMX86_AMXTypeKind &
 LLVMPoisonValueValueKind to the bottom to avoid value changes compared with
 LLVM<=11

Fixes PR48905

(cherry picked from commit 6612c2bb68becda5504099b48082c844503c6d4c)
---
 llvm/include/llvm-c/Core.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 8274213aa839..a78df16ca404 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -160,10 +160,10 @@ typedef enum {
   LLVMVectorTypeKind,    /**< Fixed width SIMD vector type */
   LLVMMetadataTypeKind,  /**< Metadata */
   LLVMX86_MMXTypeKind,   /**< X86 MMX */
-  LLVMX86_AMXTypeKind,   /**< X86 AMX */
   LLVMTokenTypeKind,     /**< Tokens */
   LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */
-  LLVMBFloatTypeKind     /**< 16 bit brain floating point type */
+  LLVMBFloatTypeKind,    /**< 16 bit brain floating point type */
+  LLVMX86_AMXTypeKind    /**< X86 AMX */
 } LLVMTypeKind;
 
 typedef enum {
@@ -270,7 +270,6 @@ typedef enum {
   LLVMConstantVectorValueKind,
 
   LLVMUndefValueValueKind,
-  LLVMPoisonValueValueKind,
   LLVMConstantAggregateZeroValueKind,
   LLVMConstantDataArrayValueKind,
   LLVMConstantDataVectorValueKind,
@@ -283,6 +282,7 @@ typedef enum {
   LLVMInlineAsmValueKind,
 
   LLVMInstructionValueKind,
+  LLVMPoisonValueValueKind
 } LLVMValueKind;
 
 typedef enum {

From 8364f5369eeeb2da8db2bae7716c549930d8df93 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 27 Jan 2021 10:59:28 -0800
Subject: [PATCH 008/244] Revert "Suppress non-conforming GNU paste extension
 in all standard-conforming modes"

This reverts commit f4537935dcdbf390c863591cf556e76c3abab9c1.
This reverts commit b43c26d036dcbf7a6881f39e4434cf059364022a.

This GNU and MSVC extension turns out to be very popular. Most projects
are not using C++20, so cannot use the new __VA_OPT__ feature to be
standards conformant. The other workaround, using -std=gnu*, enables too
many language extensions and isn't viable.

Until there is a way for users to get the behavior provided by the
`, ## __VA_ARGS__` extension in the -std=c++17 and earlier language
modes, we need to revert this.

(cherry picked from commit 61a66e4b5ec18e9e73c2f6334f6b7f7dd4bca77e)
---
 clang/lib/Lex/TokenLexer.cpp                      | 10 +++++-----
 clang/test/Preprocessor/macro_fn_comma_swallow2.c |  5 -----
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 97cb2cf0bb8c..da5681aaf478 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -148,12 +148,12 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
     return false;
 
   // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
-  // __VA_ARGS__ is empty, but not in strict mode where there are no
-  // named arguments, where it remains.  With GNU extensions, it is removed
-  // regardless of named arguments.
+  // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
+  // named arguments, where it remains.  In all other modes, including C99
+  // with GNU extensions, it is removed regardless of named arguments.
   // Microsoft also appears to support this extension, unofficially.
-  if (!PP.getLangOpts().GNUMode && !PP.getLangOpts().MSVCCompat &&
-      Macro->getNumParams() < 2)
+  if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
+        && Macro->getNumParams() < 2)
     return false;
 
   // Is a comma available to be removed?
diff --git a/clang/test/Preprocessor/macro_fn_comma_swallow2.c b/clang/test/Preprocessor/macro_fn_comma_swallow2.c
index 4e4960ca7f18..93ab2b83664a 100644
--- a/clang/test/Preprocessor/macro_fn_comma_swallow2.c
+++ b/clang/test/Preprocessor/macro_fn_comma_swallow2.c
@@ -1,16 +1,11 @@
 // Test the __VA_ARGS__ comma swallowing extensions of various compiler dialects.
 
 // RUN: %clang_cc1 -E %s | FileCheck -check-prefix=GCC -strict-whitespace %s
-// RUN: %clang_cc1 -E -std=c90 %s | FileCheck -check-prefix=C99 -strict-whitespace %s
 // RUN: %clang_cc1 -E -std=c99 %s | FileCheck -check-prefix=C99 -strict-whitespace %s
 // RUN: %clang_cc1 -E -std=c11 %s | FileCheck -check-prefix=C99 -strict-whitespace %s
 // RUN: %clang_cc1 -E -x c++ %s | FileCheck -check-prefix=GCC -strict-whitespace %s
-// RUN: %clang_cc1 -E -x c++ -std=c++03 %s | FileCheck -check-prefix=C99 -strict-whitespace %s
-// RUN: %clang_cc1 -E -x c++ -std=c++11 %s | FileCheck -check-prefix=C99 -strict-whitespace %s
 // RUN: %clang_cc1 -E -std=gnu99 %s | FileCheck -check-prefix=GCC -strict-whitespace %s
 // RUN: %clang_cc1 -E -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s
-// RUN: %clang_cc1 -E -x c++ -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s
-// RUN: %clang_cc1 -E -x c++ -std=c++11 -fms-compatibility %s | FileCheck -check-prefix=MS -strict-whitespace %s
 // RUN: %clang_cc1 -E -DNAMED %s | FileCheck -check-prefix=GCC -strict-whitespace %s
 // RUN: %clang_cc1 -E -std=c99 -DNAMED %s | FileCheck -check-prefix=C99 -strict-whitespace %s
 

From b0085d205b3063c332a080599830ef0500cb6924 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight@google.com>
Date: Mon, 7 Dec 2020 10:26:49 -0500
Subject: [PATCH 009/244] Itanium Mangling: Mangle `__alignof__` differently
 than `alignof`.

The two operations have acted differently since Clang 8, but were
unfortunately mangled the same. The new mangling uses new "vendor
extended expression" syntax proposed in
https://github.com/itanium-cxx-abi/cxx-abi/issues/112

GCC had the same mangling problem, https://gcc.gnu.org/PR88115, and
will hopefully be switching to the same mangling as implemented here.

Additionally, fix the mangling of `__uuidof` to use the new extension
syntax, instead of its previous nonstandard special-case.

Adjusts the demangler accordingly.

Differential Revision: https://reviews.llvm.org/D93922

(cherry picked from commit 9c7aeaebb3ac1b94200b59b111742cb6b8f090c2)
---
 clang/lib/AST/ItaniumMangle.cpp               | 103 ++++++++++++------
 clang/test/CodeGenCXX/mangle-alignof.cpp      |  25 +++++
 .../CodeGenCXX/microsoft-uuidof-mangling.cpp  |  44 +++++---
 libcxxabi/src/demangle/ItaniumDemangle.h      |  68 ++++++------
 libcxxabi/test/test_demangle.pass.cpp         |  14 ++-
 llvm/include/llvm/Demangle/ItaniumDemangle.h  |  68 ++++++------
 6 files changed, 211 insertions(+), 111 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/mangle-alignof.cpp

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 6c8d5687c64a..668733a4be34 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -558,6 +558,7 @@ class CXXNameMangler {
                           unsigned NumTemplateArgs);
   void mangleTemplateArgs(TemplateName TN, const TemplateArgumentList &AL);
   void mangleTemplateArg(TemplateArgument A, bool NeedExactType);
+  void mangleTemplateArgExpr(const Expr *E);
   void mangleValueInTemplateArg(QualType T, const APValue &V, bool TopLevel,
                                 bool NeedExactType = false);
 
@@ -3528,8 +3529,8 @@ void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) {
   Out << "u" << VendorQualifier.size() << VendorQualifier;
 
   Out << "I";
-  mangleTemplateArg(T->getRowExpr(), false);
-  mangleTemplateArg(T->getColumnExpr(), false);
+  mangleTemplateArgExpr(T->getRowExpr());
+  mangleTemplateArgExpr(T->getColumnExpr());
   mangleType(T->getElementType());
   Out << "E";
 }
@@ -3916,6 +3917,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   //              ::= ds <expression> <expression>                   # expr.*expr
   //              ::= sZ <template-param>                            # size of a parameter pack
   //              ::= sZ <function-param>    # size of a function parameter pack
+  //              ::= u <source-name> <template-arg>* E # vendor extended expression
   //              ::= <expr-primary>
   // <expr-primary> ::= L <type> <value number> E    # integer literal
   //                ::= L <type <value float> E      # floating literal
@@ -4007,14 +4009,26 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
 
   case Expr::CXXUuidofExprClass: {
     const CXXUuidofExpr *UE = cast<CXXUuidofExpr>(E);
-    if (UE->isTypeOperand()) {
-      QualType UuidT = UE->getTypeOperand(Context.getASTContext());
-      Out << "u8__uuidoft";
-      mangleType(UuidT);
+    // As of clang 12, uuidof uses the vendor extended expression
+    // mangling. Previously, it used a special-cased nonstandard extension.
+    if (Context.getASTContext().getLangOpts().getClangABICompat() >
+        LangOptions::ClangABI::Ver11) {
+      Out << "u8__uuidof";
+      if (UE->isTypeOperand())
+        mangleType(UE->getTypeOperand(Context.getASTContext()));
+      else
+        mangleTemplateArgExpr(UE->getExprOperand());
+      Out << 'E';
     } else {
-      Expr *UuidExp = UE->getExprOperand();
-      Out << "u8__uuidofz";
-      mangleExpression(UuidExp, Arity);
+      if (UE->isTypeOperand()) {
+        QualType UuidT = UE->getTypeOperand(Context.getASTContext());
+        Out << "u8__uuidoft";
+        mangleType(UuidT);
+      } else {
+        Expr *UuidExp = UE->getExprOperand();
+        Out << "u8__uuidofz";
+        mangleExpression(UuidExp, Arity);
+      }
     }
     break;
   }
@@ -4312,13 +4326,39 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       break;
     }
 
+    auto MangleAlignofSizeofArg = [&] {
+      if (SAE->isArgumentType()) {
+        Out << 't';
+        mangleType(SAE->getArgumentType());
+      } else {
+        Out << 'z';
+        mangleExpression(SAE->getArgumentExpr());
+      }
+    };
+
     switch(SAE->getKind()) {
     case UETT_SizeOf:
       Out << 's';
+      MangleAlignofSizeofArg();
       break;
     case UETT_PreferredAlignOf:
+      // As of clang 12, we mangle __alignof__ differently than alignof. (They
+      // have acted differently since Clang 8, but were previously mangled the
+      // same.)
+      if (Context.getASTContext().getLangOpts().getClangABICompat() >
+          LangOptions::ClangABI::Ver11) {
+        Out << "u11__alignof__";
+        if (SAE->isArgumentType())
+          mangleType(SAE->getArgumentType());
+        else
+          mangleTemplateArgExpr(SAE->getArgumentExpr());
+        Out << 'E';
+        break;
+      }
+      LLVM_FALLTHROUGH;
     case UETT_AlignOf:
       Out << 'a';
+      MangleAlignofSizeofArg();
       break;
     case UETT_VecStep: {
       DiagnosticsEngine &Diags = Context.getDiags();
@@ -4336,13 +4376,6 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       return;
     }
     }
-    if (SAE->isArgumentType()) {
-      Out << 't';
-      mangleType(SAE->getArgumentType());
-    } else {
-      Out << 'z';
-      mangleExpression(SAE->getArgumentExpr());
-    }
     break;
   }
 
@@ -4971,23 +5004,7 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) {
     mangleType(A.getAsTemplateOrTemplatePattern());
     break;
   case TemplateArgument::Expression: {
-    // It's possible to end up with a DeclRefExpr here in certain
-    // dependent cases, in which case we should mangle as a
-    // declaration.
-    const Expr *E = A.getAsExpr()->IgnoreParenImpCasts();
-    if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
-      const ValueDecl *D = DRE->getDecl();
-      if (isa<VarDecl>(D) || isa<FunctionDecl>(D)) {
-        Out << 'L';
-        mangle(D);
-        Out << 'E';
-        break;
-      }
-    }
-
-    Out << 'X';
-    mangleExpression(E);
-    Out << 'E';
+    mangleTemplateArgExpr(A.getAsExpr());
     break;
   }
   case TemplateArgument::Integral:
@@ -5044,6 +5061,26 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) {
   }
 }
 
+void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) {
+  // It's possible to end up with a DeclRefExpr here in certain
+  // dependent cases, in which case we should mangle as a
+  // declaration.
+  E = E->IgnoreParenImpCasts();
+  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
+    const ValueDecl *D = DRE->getDecl();
+    if (isa<VarDecl>(D) || isa<FunctionDecl>(D)) {
+      Out << 'L';
+      mangle(D);
+      Out << 'E';
+      return;
+    }
+  }
+
+  Out << 'X';
+  mangleExpression(E);
+  Out << 'E';
+}
+
 /// Determine whether a given value is equivalent to zero-initialization for
 /// the purpose of discarding a trailing portion of a 'tl' mangling.
 ///
diff --git a/clang/test/CodeGenCXX/mangle-alignof.cpp b/clang/test/CodeGenCXX/mangle-alignof.cpp
new file mode 100644
index 000000000000..0a65c7e87a2d
--- /dev/null
+++ b/clang/test/CodeGenCXX/mangle-alignof.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -std=c++11 -Wno-gnu-alignof-expression -emit-llvm %s -o - -triple=%itanium_abi_triple | FileCheck %s --check-prefix=CHECK-NEW
+// RUN: %clang_cc1 -std=c++11 -Wno-gnu-alignof-expression -emit-llvm %s -o - -triple=%itanium_abi_triple -fclang-abi-compat=11 | FileCheck %s --check-prefix=CHECK-OLD
+
+// Verify the difference in mangling for alignof and __alignof__ in a new ABI
+// compat mode.
+
+template <class T> void f1(decltype(alignof(T))) {}
+template void f1<int>(__SIZE_TYPE__);
+// CHECK-OLD: void @_Z2f1IiEvDTatT_E
+// CHECK-NEW: void @_Z2f1IiEvDTatT_E
+
+template <class T> void f2(decltype(__alignof__(T))) {}
+template void f2<int>(__SIZE_TYPE__);
+// CHECK-OLD: void @_Z2f2IiEvDTatT_E
+// CHECK-NEW: void @_Z2f2IiEvDTu11__alignof__T_E
+
+template <class T> void f3(decltype(alignof(T(0)))) {}
+template void f3<int>(__SIZE_TYPE__);
+// CHECK-OLD: void @_Z2f3IiEvDTazcvT_Li0EE
+// CHECK-NEW: void @_Z2f3IiEvDTazcvT_Li0EE
+
+template <class T> void f4(decltype(__alignof__(T(0)))) {}
+template void f4<int>(__SIZE_TYPE__);
+// CHECK-OLD: void @_Z2f4IiEvDTazcvT_Li0EE
+// CHECK-NEW: void @_Z2f4IiEvDTu11__alignof__XcvT_Li0EEEE
diff --git a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp
index ec26be292acc..321f65cacc71 100644
--- a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp
+++ b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions | FileCheck %s --check-prefixes=CHECK,CHECK-V12
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-unknown-unknown -fms-extensions -fclang-abi-compat=11 | FileCheck %s --check-prefixes=CHECK,CHECK-V11
 // rdar://17784718
 
 typedef struct _GUID
@@ -24,11 +25,16 @@ struct __declspec(uuid("EAFA1952-66F8-438B-8FBA-AF1BBAE42191")) TestStruct
 
 struct __declspec(uuid("EAFA1952-66F8-438B-8FBA-AF1BBAE42191")) OtherStruct {};
 
-template <class T> void test_uuidofType(void *arg[sizeof(__uuidof(T))] = 0) {}
+template <class T> void test_uuidofType(decltype(__uuidof(T)) arg) {}
 
-template <class T> void test_uuidofExpr(void *arg[sizeof(__uuidof(typename T::member))] = 0) {}
+template <class T> void test_uuidofExpr(decltype(__uuidof(T::member)) arg) {}
 
-struct HasMember { typedef TestStruct member; };
+struct HasMember {
+  TestStruct member;
+};
+
+// Ensure that mangling an "expr-primary" argument is handled properly.
+template <class T> void test_uuidofExpr2(decltype(T{}, __uuidof(HasMember::member)) arg) {}
 
 template<const GUID&> struct UUIDTestTwo { UUIDTestTwo(); };
 
@@ -39,19 +45,29 @@ int main(int argc, const char * argv[])
     // type had better not mention TestStruct or OtherStruct!
     UUIDTestTwo<__uuidof(TestStruct)> uuidof_test2;
     UUIDTestTwo<__uuidof(OtherStruct)> uuidof_test3;
-    test_uuidofType<TestStruct>();
-    test_uuidofExpr<HasMember>();
+    test_uuidofType<TestStruct>(GUID{});
+    test_uuidofExpr<HasMember>(GUID{});
+    test_uuidofExpr2<TestStruct>(GUID{});
     return 0;
 }
 
 // CHECK: define{{.*}} i32 @main
-// CHECK: call void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev
-// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev
-// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev
-// CHECK: call void @_Z15test_uuidofTypeI10TestStructEvPPv(i8** null)
-// CHECK: call void @_Z15test_uuidofExprI9HasMemberEvPPv(i8** null)
-
+// CHECK: call void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev(
+// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev(
+// CHECK: call void @_ZN11UUIDTestTwoIL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev(
+// CHECK-V11: call void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E(
+// CHECK-V12: call void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE(
+// CHECK-V11: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE(
+// CHECK-V12: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE(
+// CHECK-V11: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE(
+// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE(
+//    TODO: the above mangling is wrong -- the X/E shouldn't be emitted:       ^                     ^
 // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev
-// CHECK: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvPPv
-// CHECK: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvPPv
+// CHECK-V11: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E(
+// CHECK-V12: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE(
+// CHECK-V11: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE(
+// CHECK-V12: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE(
+// CHECK-V11: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE(
+// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE(
+//    TODO: the above mangling is wrong -- the X/E shouldn't be emitted:                      ^                     ^
 // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC2Ev
diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 6bfc02d15379..e5fca98f9271 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -96,7 +96,6 @@
     X(InitListExpr) \
     X(FoldExpr) \
     X(ThrowExpr) \
-    X(UUIDOfExpr) \
     X(BoolExpr) \
     X(StringLiteral) \
     X(LambdaExpr) \
@@ -2035,21 +2034,6 @@ class ThrowExpr : public Node {
   }
 };
 
-// MSVC __uuidof extension, generated by clang in -fms-extensions mode.
-class UUIDOfExpr : public Node {
-  Node *Operand;
-public:
-  UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {}
-
-  template<typename Fn> void match(Fn F) const { F(Operand); }
-
-  void printLeft(OutputStream &S) const override {
-    S << "__uuidof(";
-    Operand->print(S);
-    S << ")";
-  }
-};
-
 class BoolExpr : public Node {
   bool Value;
 
@@ -5013,6 +4997,43 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
     }
     }
     return nullptr;
+  case 'u': {
+    ++First;
+    Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr);
+    if (!Name)
+      return nullptr;
+    // Special case legacy __uuidof mangling. The 't' and 'z' appear where the
+    // standard encoding expects a <template-arg>, and would be otherwise be
+    // interpreted as <type> node 'short' or 'ellipsis'. However, neither
+    // __uuidof(short) nor __uuidof(...) can actually appear, so there is no
+    // actual conflict here.
+    if (Name->getBaseName() == "__uuidof") {
+      if (numLeft() < 2)
+        return nullptr;
+      if (*First == 't') {
+        ++First;
+        Node *Ty = getDerived().parseType();
+        if (!Ty)
+          return nullptr;
+        return make<CallExpr>(Name, makeNodeArray(&Ty, &Ty + 1));
+      }
+      if (*First == 'z') {
+        ++First;
+        Node *Ex = getDerived().parseExpr();
+        if (!Ex)
+          return nullptr;
+        return make<CallExpr>(Name, makeNodeArray(&Ex, &Ex + 1));
+      }
+    }
+    size_t ExprsBegin = Names.size();
+    while (!consumeIf('E')) {
+      Node *E = getDerived().parseTemplateArg();
+      if (E == nullptr)
+        return E;
+      Names.push_back(E);
+    }
+    return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin));
+  }
   case '1':
   case '2':
   case '3':
@@ -5024,21 +5045,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
   case '9':
     return getDerived().parseUnresolvedName();
   }
-
-  if (consumeIf("u8__uuidoft")) {
-    Node *Ty = getDerived().parseType();
-    if (!Ty)
-      return nullptr;
-    return make<UUIDOfExpr>(Ty);
-  }
-
-  if (consumeIf("u8__uuidofz")) {
-    Node *Ex = getDerived().parseExpr();
-    if (!Ex)
-      return nullptr;
-    return make<UUIDOfExpr>(Ex);
-  }
-
   return nullptr;
 }
 
diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index 3954fdba048e..512cc3928fdd 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -29776,8 +29776,18 @@ const char* cases[][2] =
     // Vendor extension types are substitution candidates.
     {"_Z1fu3fooS_", "f(foo, foo)"},
 
-    {"_ZN3FooIXu8__uuidofzdeL_Z3sucEEEC1Ev", "Foo<__uuidof(*(suc))>::Foo()"},
-    {"_ZN3FooIXu8__uuidoft13SomeUUIDClassEEC1Ev", "Foo<__uuidof(SomeUUIDClass)>::Foo()"},
+    // alignof with type and expression, and __alignof__ with the same.
+    {"_Z2f1IiEvDTatT_E", "void f1<int>(decltype(alignof (int)))"},
+    {"_Z2f3IiEvDTazcvT_Li0EE", "void f3<int>(decltype(alignof ((int)(0))))"},
+    {"_Z2f2IiEvDTu11__alignof__T_EE", "void f2<int>(decltype(__alignof__(int)))"},
+    {"_Z2f4IiEvDTu11__alignof__XcvT_Li0EEEE", "void f4<int>(decltype(__alignof__((int)(0))))"},
+
+    // Legacy nonstandard mangling for __uuidof.
+    {"_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E", "void test_uuidofType<TestStruct>(decltype(__uuidof(TestStruct)))"},
+    {"_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE", "void test_uuidofExpr<HasMember>(decltype(__uuidof(HasMember::member)))"},
+    // Current __uuidof mangling using vendor extended expression.
+    {"_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE", "void test_uuidofType<TestStruct>(decltype(__uuidof(TestStruct)))"},
+    {"_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE", "void test_uuidofExpr<HasMember>(decltype(__uuidof(HasMember::member)))"},
 
     // C++2a char8_t:
     {"_ZTSPDu", "typeinfo name for char8_t*"},
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 6bfc02d15379..e5fca98f9271 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -96,7 +96,6 @@
     X(InitListExpr) \
     X(FoldExpr) \
     X(ThrowExpr) \
-    X(UUIDOfExpr) \
     X(BoolExpr) \
     X(StringLiteral) \
     X(LambdaExpr) \
@@ -2035,21 +2034,6 @@ class ThrowExpr : public Node {
   }
 };
 
-// MSVC __uuidof extension, generated by clang in -fms-extensions mode.
-class UUIDOfExpr : public Node {
-  Node *Operand;
-public:
-  UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {}
-
-  template<typename Fn> void match(Fn F) const { F(Operand); }
-
-  void printLeft(OutputStream &S) const override {
-    S << "__uuidof(";
-    Operand->print(S);
-    S << ")";
-  }
-};
-
 class BoolExpr : public Node {
   bool Value;
 
@@ -5013,6 +4997,43 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
     }
     }
     return nullptr;
+  case 'u': {
+    ++First;
+    Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr);
+    if (!Name)
+      return nullptr;
+    // Special case legacy __uuidof mangling. The 't' and 'z' appear where the
+    // standard encoding expects a <template-arg>, and would be otherwise be
+    // interpreted as <type> node 'short' or 'ellipsis'. However, neither
+    // __uuidof(short) nor __uuidof(...) can actually appear, so there is no
+    // actual conflict here.
+    if (Name->getBaseName() == "__uuidof") {
+      if (numLeft() < 2)
+        return nullptr;
+      if (*First == 't') {
+        ++First;
+        Node *Ty = getDerived().parseType();
+        if (!Ty)
+          return nullptr;
+        return make<CallExpr>(Name, makeNodeArray(&Ty, &Ty + 1));
+      }
+      if (*First == 'z') {
+        ++First;
+        Node *Ex = getDerived().parseExpr();
+        if (!Ex)
+          return nullptr;
+        return make<CallExpr>(Name, makeNodeArray(&Ex, &Ex + 1));
+      }
+    }
+    size_t ExprsBegin = Names.size();
+    while (!consumeIf('E')) {
+      Node *E = getDerived().parseTemplateArg();
+      if (E == nullptr)
+        return E;
+      Names.push_back(E);
+    }
+    return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin));
+  }
   case '1':
   case '2':
   case '3':
@@ -5024,21 +5045,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
   case '9':
     return getDerived().parseUnresolvedName();
   }
-
-  if (consumeIf("u8__uuidoft")) {
-    Node *Ty = getDerived().parseType();
-    if (!Ty)
-      return nullptr;
-    return make<UUIDOfExpr>(Ty);
-  }
-
-  if (consumeIf("u8__uuidofz")) {
-    Node *Ex = getDerived().parseExpr();
-    if (!Ex)
-      return nullptr;
-    return make<UUIDOfExpr>(Ex);
-  }
-
   return nullptr;
 }
 

From 7da92afbf08e90960f7e5dee00bbf6ef8f323a5c Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight@google.com>
Date: Sun, 24 Jan 2021 15:50:15 -0500
Subject: [PATCH 010/244] Itanium Mangling: Fix handling of <expr-primary> in
 <template-arg>.

Previously, we were emitting an extraneous X .. E in <template-arg>
around an <expr-primary> if the template argument was constructed from
an expression (rather than an already-evaluated literal value).  In
such a case, we would then e.g. emit 'XLi0EE' instead of 'Li0E'.

We had one special-case for DeclRefExpr expressions, in particular, to
omit them the mangled-name without the surrounding X/E. However,
unfortunately, that special case also triggered for ParmVarDecl (a
subtype of VarDecl), and _incorrectly_ emitted 'L_Z .. E' instead of
the proper 'Xfp_E'.

This change causes mangleExpression itself to be responsible for
emitting X/E around non-primary expressions, which removes the
special-case, and corrects both these problems.

Differential Revision: https://reviews.llvm.org/D95487

(cherry picked from commit 8ca33605ff0cfc536f5c6710fb5f6378bf11959a)
---
 clang/lib/AST/ItaniumMangle.cpp               | 223 +++++++++++++-----
 clang/test/CodeGenCXX/clang-abi-compat.cpp    |  94 +++++++-
 clang/test/CodeGenCXX/mangle-abi-tag.cpp      |   2 +-
 clang/test/CodeGenCXX/mangle-concept.cpp      |   4 +-
 clang/test/CodeGenCXX/mangle-template.cpp     |   4 +-
 clang/test/CodeGenCXX/mangle.cpp              |   2 +-
 clang/test/CodeGenCXX/matrix-type.cpp         |  16 +-
 .../CodeGenCXX/microsoft-uuidof-mangling.cpp  |   6 +-
 8 files changed, 259 insertions(+), 92 deletions(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 668733a4be34..54e2f361a9f1 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -546,8 +546,8 @@ class CXXNameMangler {
                         unsigned knownArity);
   void mangleCastExpression(const Expr *E, StringRef CastEncoding);
   void mangleInitListElements(const InitListExpr *InitList);
-  void mangleDeclRefExpr(const NamedDecl *D);
-  void mangleExpression(const Expr *E, unsigned Arity = UnknownArity);
+  void mangleExpression(const Expr *E, unsigned Arity = UnknownArity,
+                        bool AsTemplateArg = false);
   void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom);
   void mangleCXXDtorType(CXXDtorType T);
 
@@ -3872,33 +3872,8 @@ void CXXNameMangler::mangleInitListElements(const InitListExpr *InitList) {
     mangleExpression(InitList->getInit(i));
 }
 
-void CXXNameMangler::mangleDeclRefExpr(const NamedDecl *D) {
-  switch (D->getKind()) {
-  default:
-    //  <expr-primary> ::= L <mangled-name> E # external name
-    Out << 'L';
-    mangle(D);
-    Out << 'E';
-    break;
-
-  case Decl::ParmVar:
-    mangleFunctionParam(cast<ParmVarDecl>(D));
-    break;
-
-  case Decl::EnumConstant: {
-    const EnumConstantDecl *ED = cast<EnumConstantDecl>(D);
-    mangleIntegerLiteral(ED->getType(), ED->getInitVal());
-    break;
-  }
-
-  case Decl::NonTypeTemplateParm:
-    const NonTypeTemplateParmDecl *PD = cast<NonTypeTemplateParmDecl>(D);
-    mangleTemplateParameter(PD->getDepth(), PD->getIndex());
-    break;
-  }
-}
-
-void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
+void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
+                                      bool AsTemplateArg) {
   // <expression> ::= <unary operator-name> <expression>
   //              ::= <binary operator-name> <expression> <expression>
   //              ::= <trinary operator-name> <expression> <expression> <expression>
@@ -3912,6 +3887,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   //              ::= at <type>                      # alignof (a type)
   //              ::= <template-param>
   //              ::= <function-param>
+  //              ::= fpT                            # 'this' expression (part of <function-param>)
   //              ::= sr <type> <unqualified-name>                   # dependent name
   //              ::= sr <type> <unqualified-name> <template-args>   # dependent template-id
   //              ::= ds <expression> <expression>                   # expr.*expr
@@ -3920,11 +3896,55 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   //              ::= u <source-name> <template-arg>* E # vendor extended expression
   //              ::= <expr-primary>
   // <expr-primary> ::= L <type> <value number> E    # integer literal
-  //                ::= L <type <value float> E      # floating literal
+  //                ::= L <type> <value float> E     # floating literal
+  //                ::= L <type> <string type> E     # string literal
+  //                ::= L <nullptr type> E           # nullptr literal "LDnE"
+  //                ::= L <pointer type> 0 E         # null pointer template argument
+  //                ::= L <type> <real-part float> _ <imag-part float> E    # complex floating point literal (C99); not used by clang
   //                ::= L <mangled-name> E           # external name
-  //                ::= fpT                          # 'this' expression
   QualType ImplicitlyConvertedToType;
 
+  // A top-level expression that's not <expr-primary> needs to be wrapped in
+  // X...E in a template arg.
+  bool IsPrimaryExpr = true;
+  auto NotPrimaryExpr = [&] {
+    if (AsTemplateArg && IsPrimaryExpr)
+      Out << 'X';
+    IsPrimaryExpr = false;
+  };
+
+  auto MangleDeclRefExpr = [&](const NamedDecl *D) {
+    switch (D->getKind()) {
+    default:
+      //  <expr-primary> ::= L <mangled-name> E # external name
+      Out << 'L';
+      mangle(D);
+      Out << 'E';
+      break;
+
+    case Decl::ParmVar:
+      NotPrimaryExpr();
+      mangleFunctionParam(cast<ParmVarDecl>(D));
+      break;
+
+    case Decl::EnumConstant: {
+      // <expr-primary>
+      const EnumConstantDecl *ED = cast<EnumConstantDecl>(D);
+      mangleIntegerLiteral(ED->getType(), ED->getInitVal());
+      break;
+    }
+
+    case Decl::NonTypeTemplateParm:
+      NotPrimaryExpr();
+      const NonTypeTemplateParmDecl *PD = cast<NonTypeTemplateParmDecl>(D);
+      mangleTemplateParameter(PD->getDepth(), PD->getIndex());
+      break;
+    }
+  };
+
+  // 'goto recurse' is used when handling a simple "unwrapping" node which
+  // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need
+  // to be preserved.
 recurse:
   switch (E->getStmtClass()) {
   case Expr::NoStmtClass:
@@ -3996,6 +4016,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   case Expr::SourceLocExprClass:
   case Expr::BuiltinBitCastExprClass:
   {
+    NotPrimaryExpr();
     if (!NullOut) {
       // As bad as this diagnostic is, it's better than crashing.
       DiagnosticsEngine &Diags = Context.getDiags();
@@ -4003,11 +4024,13 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
                                        "cannot yet mangle expression type %0");
       Diags.Report(E->getExprLoc(), DiagID)
         << E->getStmtClassName() << E->getSourceRange();
+      return;
     }
     break;
   }
 
   case Expr::CXXUuidofExprClass: {
+    NotPrimaryExpr();
     const CXXUuidofExpr *UE = cast<CXXUuidofExpr>(E);
     // As of clang 12, uuidof uses the vendor extended expression
     // mangling. Previously, it used a special-cased nonstandard extension.
@@ -4027,7 +4050,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       } else {
         Expr *UuidExp = UE->getExprOperand();
         Out << "u8__uuidofz";
-        mangleExpression(UuidExp, Arity);
+        mangleExpression(UuidExp);
       }
     }
     break;
@@ -4035,13 +4058,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
 
   // Even gcc-4.5 doesn't mangle this.
   case Expr::BinaryConditionalOperatorClass: {
+    NotPrimaryExpr();
     DiagnosticsEngine &Diags = Context.getDiags();
     unsigned DiagID =
       Diags.getCustomDiagID(DiagnosticsEngine::Error,
                 "?: operator with omitted middle operand cannot be mangled");
     Diags.Report(E->getExprLoc(), DiagID)
       << E->getStmtClassName() << E->getSourceRange();
-    break;
+    return;
   }
 
   // These are used for internal purposes and cannot be meaningfully mangled.
@@ -4049,6 +4073,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
     llvm_unreachable("cannot mangle opaque value; mangling wrong thing?");
 
   case Expr::InitListExprClass: {
+    NotPrimaryExpr();
     Out << "il";
     mangleInitListElements(cast<InitListExpr>(E));
     Out << "E";
@@ -4056,6 +4081,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::DesignatedInitExprClass: {
+    NotPrimaryExpr();
     auto *DIE = cast<DesignatedInitExpr>(E);
     for (const auto &Designator : DIE->designators()) {
       if (Designator.isFieldDesignator()) {
@@ -4077,27 +4103,27 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXDefaultArgExprClass:
-    mangleExpression(cast<CXXDefaultArgExpr>(E)->getExpr(), Arity);
-    break;
+    E = cast<CXXDefaultArgExpr>(E)->getExpr();
+    goto recurse;
 
   case Expr::CXXDefaultInitExprClass:
-    mangleExpression(cast<CXXDefaultInitExpr>(E)->getExpr(), Arity);
-    break;
+    E = cast<CXXDefaultInitExpr>(E)->getExpr();
+    goto recurse;
 
   case Expr::CXXStdInitializerListExprClass:
-    mangleExpression(cast<CXXStdInitializerListExpr>(E)->getSubExpr(), Arity);
-    break;
+    E = cast<CXXStdInitializerListExpr>(E)->getSubExpr();
+    goto recurse;
 
   case Expr::SubstNonTypeTemplateParmExprClass:
-    mangleExpression(cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement(),
-                     Arity);
-    break;
+    E = cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement();
+    goto recurse;
 
   case Expr::UserDefinedLiteralClass:
     // We follow g++'s approach of mangling a UDL as a call to the literal
     // operator.
   case Expr::CXXMemberCallExprClass: // fallthrough
   case Expr::CallExprClass: {
+    NotPrimaryExpr();
     const CallExpr *CE = cast<CallExpr>(E);
 
     // <expression> ::= cp <simple-id> <expression>* E
@@ -4128,6 +4154,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXNewExprClass: {
+    NotPrimaryExpr();
     const CXXNewExpr *New = cast<CXXNewExpr>(E);
     if (New->isGlobalNew()) Out << "gs";
     Out << (New->isArray() ? "na" : "nw");
@@ -4163,6 +4190,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXPseudoDestructorExprClass: {
+    NotPrimaryExpr();
     const auto *PDE = cast<CXXPseudoDestructorExpr>(E);
     if (const Expr *Base = PDE->getBase())
       mangleMemberExprBase(Base, PDE->isArrow());
@@ -4189,6 +4217,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::MemberExprClass: {
+    NotPrimaryExpr();
     const MemberExpr *ME = cast<MemberExpr>(E);
     mangleMemberExpr(ME->getBase(), ME->isArrow(),
                      ME->getQualifier(), nullptr,
@@ -4199,6 +4228,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::UnresolvedMemberExprClass: {
+    NotPrimaryExpr();
     const UnresolvedMemberExpr *ME = cast<UnresolvedMemberExpr>(E);
     mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(),
                      ME->isArrow(), ME->getQualifier(), nullptr,
@@ -4209,6 +4239,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXDependentScopeMemberExprClass: {
+    NotPrimaryExpr();
     const CXXDependentScopeMemberExpr *ME
       = cast<CXXDependentScopeMemberExpr>(E);
     mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(),
@@ -4221,6 +4252,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::UnresolvedLookupExprClass: {
+    NotPrimaryExpr();
     const UnresolvedLookupExpr *ULE = cast<UnresolvedLookupExpr>(E);
     mangleUnresolvedName(ULE->getQualifier(), ULE->getName(),
                          ULE->getTemplateArgs(), ULE->getNumTemplateArgs(),
@@ -4229,6 +4261,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXUnresolvedConstructExprClass: {
+    NotPrimaryExpr();
     const CXXUnresolvedConstructExpr *CE = cast<CXXUnresolvedConstructExpr>(E);
     unsigned N = CE->getNumArgs();
 
@@ -4239,7 +4272,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       mangleType(CE->getType());
       mangleInitListElements(IL);
       Out << "E";
-      return;
+      break;
     }
 
     Out << "cv";
@@ -4251,14 +4284,17 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXConstructExprClass: {
+    // An implicit cast is silent, thus may contain <expr-primary>.
     const auto *CE = cast<CXXConstructExpr>(E);
     if (!CE->isListInitialization() || CE->isStdInitListInitialization()) {
       assert(
           CE->getNumArgs() >= 1 &&
           (CE->getNumArgs() == 1 || isa<CXXDefaultArgExpr>(CE->getArg(1))) &&
           "implicit CXXConstructExpr must have one argument");
-      return mangleExpression(cast<CXXConstructExpr>(E)->getArg(0));
+      E = cast<CXXConstructExpr>(E)->getArg(0);
+      goto recurse;
     }
+    NotPrimaryExpr();
     Out << "il";
     for (auto *E : CE->arguments())
       mangleExpression(E);
@@ -4267,6 +4303,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXTemporaryObjectExprClass: {
+    NotPrimaryExpr();
     const auto *CE = cast<CXXTemporaryObjectExpr>(E);
     unsigned N = CE->getNumArgs();
     bool List = CE->isListInitialization();
@@ -4296,17 +4333,20 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXScalarValueInitExprClass:
+    NotPrimaryExpr();
     Out << "cv";
     mangleType(E->getType());
     Out << "_E";
     break;
 
   case Expr::CXXNoexceptExprClass:
+    NotPrimaryExpr();
     Out << "nx";
     mangleExpression(cast<CXXNoexceptExpr>(E)->getOperand());
     break;
 
   case Expr::UnaryExprOrTypeTraitExprClass: {
+    // Non-instantiation-dependent traits are an <expr-primary> integer literal.
     const UnaryExprOrTypeTraitExpr *SAE = cast<UnaryExprOrTypeTraitExpr>(E);
 
     if (!SAE->isInstantiationDependent()) {
@@ -4326,6 +4366,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       break;
     }
 
+    NotPrimaryExpr(); // But otherwise, they are not.
+
     auto MangleAlignofSizeofArg = [&] {
       if (SAE->isArgumentType()) {
         Out << 't';
@@ -4380,6 +4422,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXThrowExprClass: {
+    NotPrimaryExpr();
     const CXXThrowExpr *TE = cast<CXXThrowExpr>(E);
     //  <expression> ::= tw <expression>  # throw expression
     //               ::= tr               # rethrow
@@ -4393,6 +4436,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXTypeidExprClass: {
+    NotPrimaryExpr();
     const CXXTypeidExpr *TIE = cast<CXXTypeidExpr>(E);
     //  <expression> ::= ti <type>        # typeid (type)
     //               ::= te <expression>  # typeid (expression)
@@ -4407,6 +4451,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXDeleteExprClass: {
+    NotPrimaryExpr();
     const CXXDeleteExpr *DE = cast<CXXDeleteExpr>(E);
     //  <expression> ::= [gs] dl <expression>  # [::] delete expr
     //               ::= [gs] da <expression>  # [::] delete [] expr
@@ -4417,6 +4462,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::UnaryOperatorClass: {
+    NotPrimaryExpr();
     const UnaryOperator *UO = cast<UnaryOperator>(E);
     mangleOperatorName(UnaryOperator::getOverloadedOperator(UO->getOpcode()),
                        /*Arity=*/1);
@@ -4425,6 +4471,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::ArraySubscriptExprClass: {
+    NotPrimaryExpr();
     const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(E);
 
     // Array subscript is treated as a syntactically weird form of
@@ -4436,6 +4483,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::MatrixSubscriptExprClass: {
+    NotPrimaryExpr();
     const MatrixSubscriptExpr *ME = cast<MatrixSubscriptExpr>(E);
     Out << "ixix";
     mangleExpression(ME->getBase());
@@ -4446,6 +4494,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
 
   case Expr::CompoundAssignOperatorClass: // fallthrough
   case Expr::BinaryOperatorClass: {
+    NotPrimaryExpr();
     const BinaryOperator *BO = cast<BinaryOperator>(E);
     if (BO->getOpcode() == BO_PtrMemD)
       Out << "ds";
@@ -4458,6 +4507,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXRewrittenBinaryOperatorClass: {
+    NotPrimaryExpr();
     // The mangled form represents the original syntax.
     CXXRewrittenBinaryOperator::DecomposedForm Decomposed =
         cast<CXXRewrittenBinaryOperator>(E)->getDecomposedForm();
@@ -4469,6 +4519,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::ConditionalOperatorClass: {
+    NotPrimaryExpr();
     const ConditionalOperator *CO = cast<ConditionalOperator>(E);
     mangleOperatorName(OO_Conditional, /*Arity=*/3);
     mangleExpression(CO->getCond());
@@ -4484,19 +4535,22 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::ObjCBridgedCastExprClass: {
+    NotPrimaryExpr();
     // Mangle ownership casts as a vendor extended operator __bridge,
     // __bridge_transfer, or __bridge_retain.
     StringRef Kind = cast<ObjCBridgedCastExpr>(E)->getBridgeKindName();
     Out << "v1U" << Kind.size() << Kind;
+    mangleCastExpression(E, "cv");
+    break;
   }
-  // Fall through to mangle the cast itself.
-  LLVM_FALLTHROUGH;
 
   case Expr::CStyleCastExprClass:
+    NotPrimaryExpr();
     mangleCastExpression(E, "cv");
     break;
 
   case Expr::CXXFunctionalCastExprClass: {
+    NotPrimaryExpr();
     auto *Sub = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreImplicit();
     // FIXME: Add isImplicit to CXXConstructExpr.
     if (auto *CCE = dyn_cast<CXXConstructExpr>(Sub))
@@ -4516,22 +4570,28 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXStaticCastExprClass:
+    NotPrimaryExpr();
     mangleCastExpression(E, "sc");
     break;
   case Expr::CXXDynamicCastExprClass:
+    NotPrimaryExpr();
     mangleCastExpression(E, "dc");
     break;
   case Expr::CXXReinterpretCastExprClass:
+    NotPrimaryExpr();
     mangleCastExpression(E, "rc");
     break;
   case Expr::CXXConstCastExprClass:
+    NotPrimaryExpr();
     mangleCastExpression(E, "cc");
     break;
   case Expr::CXXAddrspaceCastExprClass:
+    NotPrimaryExpr();
     mangleCastExpression(E, "ac");
     break;
 
   case Expr::CXXOperatorCallExprClass: {
+    NotPrimaryExpr();
     const CXXOperatorCallExpr *CE = cast<CXXOperatorCallExpr>(E);
     unsigned NumArgs = CE->getNumArgs();
     // A CXXOperatorCallExpr for OO_Arrow models only semantics, not syntax
@@ -4545,9 +4605,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::ParenExprClass:
-    mangleExpression(cast<ParenExpr>(E)->getSubExpr(), Arity);
-    break;
-
+    E = cast<ParenExpr>(E)->getSubExpr();
+    goto recurse;
 
   case Expr::ConceptSpecializationExprClass: {
     //  <expr-primary> ::= L <mangled-name> E # external name
@@ -4561,10 +4620,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::DeclRefExprClass:
-    mangleDeclRefExpr(cast<DeclRefExpr>(E)->getDecl());
+    // MangleDeclRefExpr helper handles primary-vs-nonprimary
+    MangleDeclRefExpr(cast<DeclRefExpr>(E)->getDecl());
     break;
 
   case Expr::SubstNonTypeTemplateParmPackExprClass:
+    NotPrimaryExpr();
     // FIXME: not clear how to mangle this!
     // template <unsigned N...> class A {
     //   template <class U...> void foo(U (&x)[N]...);
@@ -4573,14 +4634,16 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
     break;
 
   case Expr::FunctionParmPackExprClass: {
+    NotPrimaryExpr();
     // FIXME: not clear how to mangle this!
     const FunctionParmPackExpr *FPPE = cast<FunctionParmPackExpr>(E);
     Out << "v110_SUBSTPACK";
-    mangleDeclRefExpr(FPPE->getParameterPack());
+    MangleDeclRefExpr(FPPE->getParameterPack());
     break;
   }
 
   case Expr::DependentScopeDeclRefExprClass: {
+    NotPrimaryExpr();
     const DependentScopeDeclRefExpr *DRE = cast<DependentScopeDeclRefExpr>(E);
     mangleUnresolvedName(DRE->getQualifier(), DRE->getDeclName(),
                          DRE->getTemplateArgs(), DRE->getNumTemplateArgs(),
@@ -4589,24 +4652,27 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXBindTemporaryExprClass:
-    mangleExpression(cast<CXXBindTemporaryExpr>(E)->getSubExpr());
-    break;
+    E = cast<CXXBindTemporaryExpr>(E)->getSubExpr();
+    goto recurse;
 
   case Expr::ExprWithCleanupsClass:
-    mangleExpression(cast<ExprWithCleanups>(E)->getSubExpr(), Arity);
-    break;
+    E = cast<ExprWithCleanups>(E)->getSubExpr();
+    goto recurse;
 
   case Expr::FloatingLiteralClass: {
+    // <expr-primary>
     const FloatingLiteral *FL = cast<FloatingLiteral>(E);
     mangleFloatLiteral(FL->getType(), FL->getValue());
     break;
   }
 
   case Expr::FixedPointLiteralClass:
+    // Currently unimplemented -- might be <expr-primary> in future?
     mangleFixedPointLiteral();
     break;
 
   case Expr::CharacterLiteralClass:
+    // <expr-primary>
     Out << 'L';
     mangleType(E->getType());
     Out << cast<CharacterLiteral>(E)->getValue();
@@ -4615,18 +4681,21 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
 
   // FIXME. __objc_yes/__objc_no are mangled same as true/false
   case Expr::ObjCBoolLiteralExprClass:
+    // <expr-primary>
     Out << "Lb";
     Out << (cast<ObjCBoolLiteralExpr>(E)->getValue() ? '1' : '0');
     Out << 'E';
     break;
 
   case Expr::CXXBoolLiteralExprClass:
+    // <expr-primary>
     Out << "Lb";
     Out << (cast<CXXBoolLiteralExpr>(E)->getValue() ? '1' : '0');
     Out << 'E';
     break;
 
   case Expr::IntegerLiteralClass: {
+    // <expr-primary>
     llvm::APSInt Value(cast<IntegerLiteral>(E)->getValue());
     if (E->getType()->isSignedIntegerType())
       Value.setIsSigned(true);
@@ -4635,6 +4704,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::ImaginaryLiteralClass: {
+    // <expr-primary>
     const ImaginaryLiteral *IE = cast<ImaginaryLiteral>(E);
     // Mangle as if a complex literal.
     // Proposal from David Vandevoorde, 2010.06.30.
@@ -4658,6 +4728,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::StringLiteralClass: {
+    // <expr-primary>
     // Revised proposal from David Vandervoorde, 2010.07.15.
     Out << 'L';
     assert(isa<ConstantArrayType>(E->getType()));
@@ -4667,21 +4738,25 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::GNUNullExprClass:
+    // <expr-primary>
     // Mangle as if an integer literal 0.
     mangleIntegerLiteral(E->getType(), llvm::APSInt(32));
     break;
 
   case Expr::CXXNullPtrLiteralExprClass: {
+    // <expr-primary>
     Out << "LDnE";
     break;
   }
 
   case Expr::PackExpansionExprClass:
+    NotPrimaryExpr();
     Out << "sp";
     mangleExpression(cast<PackExpansionExpr>(E)->getPattern());
     break;
 
   case Expr::SizeOfPackExprClass: {
+    NotPrimaryExpr();
     auto *SPE = cast<SizeOfPackExpr>(E);
     if (SPE->isPartiallySubstituted()) {
       Out << "sP";
@@ -4706,12 +4781,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
     break;
   }
 
-  case Expr::MaterializeTemporaryExprClass: {
-    mangleExpression(cast<MaterializeTemporaryExpr>(E)->getSubExpr());
-    break;
-  }
+  case Expr::MaterializeTemporaryExprClass:
+    E = cast<MaterializeTemporaryExpr>(E)->getSubExpr();
+    goto recurse;
 
   case Expr::CXXFoldExprClass: {
+    NotPrimaryExpr();
     auto *FE = cast<CXXFoldExpr>(E);
     if (FE->isLeftFold())
       Out << (FE->getInit() ? "fL" : "fl");
@@ -4733,27 +4808,34 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
   }
 
   case Expr::CXXThisExprClass:
+    NotPrimaryExpr();
     Out << "fpT";
     break;
 
   case Expr::CoawaitExprClass:
     // FIXME: Propose a non-vendor mangling.
+    NotPrimaryExpr();
     Out << "v18co_await";
     mangleExpression(cast<CoawaitExpr>(E)->getOperand());
     break;
 
   case Expr::DependentCoawaitExprClass:
     // FIXME: Propose a non-vendor mangling.
+    NotPrimaryExpr();
     Out << "v18co_await";
     mangleExpression(cast<DependentCoawaitExpr>(E)->getOperand());
     break;
 
   case Expr::CoyieldExprClass:
     // FIXME: Propose a non-vendor mangling.
+    NotPrimaryExpr();
     Out << "v18co_yield";
     mangleExpression(cast<CoawaitExpr>(E)->getOperand());
     break;
   }
+
+  if (AsTemplateArg && !IsPrimaryExpr)
+    Out << 'E';
 }
 
 /// Mangle an expression which refers to a parameter variable.
@@ -5003,10 +5085,9 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) {
     Out << "Dp";
     mangleType(A.getAsTemplateOrTemplatePattern());
     break;
-  case TemplateArgument::Expression: {
+  case TemplateArgument::Expression:
     mangleTemplateArgExpr(A.getAsExpr());
     break;
-  }
   case TemplateArgument::Integral:
     mangleIntegerLiteral(A.getIntegralType(), A.getAsIntegral());
     break;
@@ -5062,9 +5143,22 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) {
 }
 
 void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) {
-  // It's possible to end up with a DeclRefExpr here in certain
-  // dependent cases, in which case we should mangle as a
-  // declaration.
+  ASTContext &Ctx = Context.getASTContext();
+  if (Ctx.getLangOpts().getClangABICompat() > LangOptions::ClangABI::Ver11) {
+    mangleExpression(E, UnknownArity, /*AsTemplateArg=*/true);
+    return;
+  }
+
+  // Prior to Clang 12, we didn't omit the X .. E around <expr-primary>
+  // correctly in cases where the template argument was
+  // constructed from an expression rather than an already-evaluated
+  // literal. In such a case, we would then e.g. emit 'XLi0EE' instead of
+  // 'Li0E'.
+  //
+  // We did special-case DeclRefExpr to attempt to DTRT for that one
+  // expression-kind, but while doing so, unfortunately handled ParmVarDecl
+  // (subtype of VarDecl) _incorrectly_, and emitted 'L_Z .. E' instead of
+  // the proper 'Xfp_E'.
   E = E->IgnoreParenImpCasts();
   if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
     const ValueDecl *D = DRE->getDecl();
@@ -5075,7 +5169,6 @@ void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) {
       return;
     }
   }
-
   Out << 'X';
   mangleExpression(E);
   Out << 'E';
diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp
index 46e7ed812cbc..caf06bd5f9f6 100644
--- a/clang/test/CodeGenCXX/clang-abi-compat.cpp
+++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp
@@ -1,12 +1,23 @@
-// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fclang-abi-compat=3.0 %s -emit-llvm -o -    | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.0 %s -emit-llvm -o -    | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.8 %s -emit-llvm -o -    | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=3.9 %s -emit-llvm -o -    | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=4.0 %s -emit-llvm -o -    | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=5 %s -emit-llvm -o -      | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s
-// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fclang-abi-compat=11 %s -emit-llvm -o -     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s
-// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fclang-abi-compat=latest %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s
-// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fclang-abi-compat=latest %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17 %s
+// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - -Wno-c++11-extensions \
+// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.0 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.8 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,PRE39,PRE5,PRE12 %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=3.9 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=4.0 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,PRE5,PRE12 %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=5 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17 %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=11 %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,PRE12,PRE12-CXX17,PRE12-CXX20 %s
+// RUN: %clang_cc1 -std=c++98 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - -Wno-c++11-extensions \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12 %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-linux-gnu -fenable-matrix -fclang-abi-compat=latest %s -emit-llvm -o - \
+// RUN:     | FileCheck --check-prefixes=CHECK,V39,V5,V12,V12-CXX17,V12-CXX20 %s
 
 typedef __attribute__((vector_size(8))) long long v1xi64;
 void clang39(v1xi64) {}
@@ -55,3 +66,68 @@ template void clang12_b<arr>();
 // CHECK: @_Z9clang12_cIXadL_Z3arrEEEvv
 template<const char (*)[6]> void clang12_c() {}
 template void clang12_c<&arr>();
+
+
+/// Tests for <template-arg> <expr-primary> changes in clang12:
+namespace expr_primary {
+struct A {
+  template<int N> struct Int {};
+  template<int& N> struct Ref {};
+};
+
+/// Check various DeclRefExpr manglings
+
+// PRE12: @_ZN12expr_primary5test1INS_1AEEEvNT_3IntIXLi1EEEE
+// V12:   @_ZN12expr_primary5test1INS_1AEEEvNT_3IntILi1EEE
+template <typename T> void test1(typename T::template Int<1> a) {}
+template void test1<A>(typename A::template Int<1> a);
+
+enum Enum { EnumVal = 4 };
+int Global;
+
+// PRE12: @_ZN12expr_primary5test2INS_1AEEEvNT_3IntIXLNS_4EnumE4EEEE
+// V12:   @_ZN12expr_primary5test2INS_1AEEEvNT_3IntILNS_4EnumE4EEE
+template <typename T> void test2(typename T::template Int<EnumVal> a) {}
+template void test2<A>(typename A::template Int<4> a);
+
+// CHECK: @_ZN12expr_primary5test3ILi3EEEvNS_1A3IntIXT_EEE
+template <int X> void test3(typename A::template Int<X> a) {}
+template void test3<3>(A::Int<3> a);
+
+#if __cplusplus >= 202002L
+// CHECK-CXX20: @_ZN12expr_primary5test4INS_1AEEEvNT_3RefIL_ZNS_6GlobalEEEE
+template <typename T> void test4(typename T::template Ref<(Global)> a) {}
+template void test4<A>(typename A::template Ref<Global> a);
+
+struct B {
+  struct X {
+    constexpr X(double) {}
+    constexpr X(int&) {}
+  };
+  template<X> struct Y {};
+};
+
+// PRE12-CXX20: _ZN12expr_primary5test5INS_1BEEEvNT_1YIXLd3ff0000000000000EEEE
+// V12-CXX20: _ZN12expr_primary5test5INS_1BEEEvNT_1YILd3ff0000000000000EEE
+template<typename T> void test5(typename T::template Y<1.0>) { }
+template void test5<B>(typename B::Y<1.0>);
+
+// PRE12-CXX20: @_ZN12expr_primary5test6INS_1BEEENT_1YIL_ZZNS_5test6EiE1bEEEi
+// V12-CXX20:   @_ZN12expr_primary5test6INS_1BEEENT_1YIXfp_EEEi
+template<typename T> auto test6(int b) -> typename T::template Y<b> { return {}; }
+template auto test6<B>(int b) -> B::Y<b>;
+#endif
+
+/// Verify non-dependent type-traits within a dependent template arg.
+
+// PRE12: @_ZN12expr_primary5test7INS_1AEEEvNT_3IntIXLm1EEEE
+// V12:   @_ZN12expr_primary5test7INS_1AEEEvNT_3IntILm1EEE
+template <class T> void test7(typename T::template Int<sizeof(char)> a) {}
+template void test7<A>(A::Int<1>);
+
+// PRE12: @_ZN12expr_primary5test8ILi2EEEvu11matrix_typeIXLi1EEXT_EiE
+// V12:   @_ZN12expr_primary5test8ILi2EEEvu11matrix_typeILi1EXT_EiE
+template<int N> using matrix1xN = int __attribute__((matrix_type(1, N)));
+template<int N> void test8(matrix1xN<N> a) {}
+template void test8<2>(matrix1xN<2> a);
+}
diff --git a/clang/test/CodeGenCXX/mangle-abi-tag.cpp b/clang/test/CodeGenCXX/mangle-abi-tag.cpp
index 5d84096d24cd..9e26604a2c44 100644
--- a/clang/test/CodeGenCXX/mangle-abi-tag.cpp
+++ b/clang/test/CodeGenCXX/mangle-abi-tag.cpp
@@ -225,7 +225,7 @@ namespace pr30440 {
 template<class F> void g(F);
 template<class ...A> auto h(A ...a)->decltype (g (0, g < a > (a) ...)) {
 }
-// CHECK-DAG: define {{.*}} @_ZN7pr304401hIJEEEDTcl1gLi0Espcl1gIL_ZZNS_1hEDpT_E1aEEfp_EEES2_(
+// CHECK-DAG: define {{.*}} @_ZN7pr304401hIJEEEDTcl1gLi0Espcl1gIXfp_EEfp_EEEDpT_(
 
 void pr30440_test () {
   h();
diff --git a/clang/test/CodeGenCXX/mangle-concept.cpp b/clang/test/CodeGenCXX/mangle-concept.cpp
index b0fcd586727e..e60e6348a5f6 100644
--- a/clang/test/CodeGenCXX/mangle-concept.cpp
+++ b/clang/test/CodeGenCXX/mangle-concept.cpp
@@ -6,11 +6,11 @@ template <bool> struct S {};
 template <typename> concept C = true;
 template <typename T = int> S<C<T>> f0() { return S<C<T>>{}; }
 template S<C<int>> f0<>();
-// CHECK: @_ZN5test12f0IiEENS_1SIXL_ZNS_1CIT_EEEEEEv(
+// CHECK: @_ZN5test12f0IiEENS_1SIL_ZNS_1CIT_EEEEEv(
 }
 
 template <bool> struct S {};
 template <typename> concept C = true;
 template <typename T = int> S<C<T>> f0() { return S<C<T>>{}; }
 template S<C<int>> f0<>();
-// CHECK: @_Z2f0IiE1SIXL_Z1CIT_EEEEv(
+// CHECK: @_Z2f0IiE1SIL_Z1CIT_EEEv(
diff --git a/clang/test/CodeGenCXX/mangle-template.cpp b/clang/test/CodeGenCXX/mangle-template.cpp
index 40688de7e12e..9b80a6d64695 100644
--- a/clang/test/CodeGenCXX/mangle-template.cpp
+++ b/clang/test/CodeGenCXX/mangle-template.cpp
@@ -270,7 +270,7 @@ namespace test17 {
   // Note: there is no J...E here, because we can't form a pack argument, and
   // the 5u and 6u are mangled with the original type 'j' (unsigned int) not
   // with the resolved type 'i' (signed int).
-  // CHECK: define {{.*}} @_ZN6test171hILi4EJLi1ELi2ELi3EEEEvNS_1XIXspT0_EXLj5EEXT_EXLj6EEEE
+  // CHECK: define {{.*}} @_ZN6test171hILi4EJLi1ELi2ELi3EEEEvNS_1XIXspT0_ELj5EXT_ELj6EEE
   template<int D, int ...C> void h(X<C..., 5u, D, 6u>) {}
   void i() { h<4, 1, 2, 3>({}); }
 
@@ -323,7 +323,7 @@ namespace partially_dependent_template_args {
     // callee is unresolved, the rest mangle the converted argument Lj0E
     // because the callee is resolved.
     void h() {
-      // CHECK: @_ZN33partially_dependent_template_args5test22g1INS0_1XEEEvDTcl1fIXLi0EEEcvT__EEE
+      // CHECK: @_ZN33partially_dependent_template_args5test22g1INS0_1XEEEvDTcl1fILi0EEcvT__EEE
       g1<X>({});
       // CHECK: @_ZN33partially_dependent_template_args5test22g2IiEEvDTplclL_ZNS0_1fILj0EEEiNS0_1XEEilEEcvT__EE
       g2<int>({});
diff --git a/clang/test/CodeGenCXX/mangle.cpp b/clang/test/CodeGenCXX/mangle.cpp
index f8ea9960a5c5..6cec33e3758e 100644
--- a/clang/test/CodeGenCXX/mangle.cpp
+++ b/clang/test/CodeGenCXX/mangle.cpp
@@ -1123,7 +1123,7 @@ namespace test56 {
 namespace test57 {
   struct X { template <int N> int f(); } x;
   template<int N> void f(decltype(x.f<0>() + N)) {}
-  // CHECK-LABEL: @_ZN6test571fILi0EEEvDTplcldtL_ZNS_1xEE1fIXLi0EEEET_E
+  // CHECK-LABEL: @_ZN6test571fILi0EEEvDTplcldtL_ZNS_1xEE1fILi0EEET_E
   template void f<0>(int);
 }
 
diff --git a/clang/test/CodeGenCXX/matrix-type.cpp b/clang/test/CodeGenCXX/matrix-type.cpp
index 9bde12e13b86..9e715e10ce1c 100644
--- a/clang/test/CodeGenCXX/matrix-type.cpp
+++ b/clang/test/CodeGenCXX/matrix-type.cpp
@@ -215,14 +215,14 @@ void test_template_deduction() {
   // CHECK-NEXT:    %m4 = alloca [144 x float], align 4
   // CHECK-NEXT:    %v = alloca %struct.selector.3, align 1
   // CHECK-NEXT:    %undef.agg.tmp4 = alloca %struct.selector.3, align 1
-  // CHECK-NEXT:    call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeIXLm10EEXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0)
+  // CHECK-NEXT:    call void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m0)
   // CHECK-NEXT:    call void @_Z10use_matrixIiE8selectorILi2EERu11matrix_typeILm10ELm10ET_E([100 x i32]* nonnull align 4 dereferenceable(400) %m1)
-  // CHECK-NEXT:    call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_EXLm10EET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2)
+  // CHECK-NEXT:    call void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m2)
   // CHECK-NEXT:    call void @_Z10use_matrixIiLm12ELm12EE8selectorILi0EERu11matrix_typeIXT0_EXT1_ET_E([144 x i32]* nonnull align 4 dereferenceable(576) %m3)
   // CHECK-NEXT:    call void @_Z10use_matrixILm12ELm12EE8selectorILi4EERu11matrix_typeIXT_EXT0_EfE([144 x float]* nonnull align 4 dereferenceable(576) %m4)
   // CHECK-NEXT:    ret void
 
-  // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeIXLm10EEXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m)
+  // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi3EERu11matrix_typeILm10EXT0_ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m)
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    %m.addr = alloca [120 x i32]*, align 8
   // CHECK-NEXT:    store [120 x i32]* %m, [120 x i32]** %m.addr, align 8
@@ -236,7 +236,7 @@ void test_template_deduction() {
   // CHECK-NEXT:    call void @llvm.trap()
   // CHECK-NEXT:    unreachable
 
-  // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_EXLm10EET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m)
+  // CHECK-LABEL: define linkonce_odr void @_Z10use_matrixIiLm12EE8selectorILi1EERu11matrix_typeIXT0_ELm10ET_E([120 x i32]* nonnull align 4 dereferenceable(480) %m)
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    %m.addr = alloca [120 x i32]*, align 8
   // CHECK-NEXT:    store [120 x i32]* %m, [120 x i32]** %m.addr, align 8
@@ -277,10 +277,10 @@ void test_auto_t() {
   // CHECK-LABEL: define{{.*}} void @_Z11test_auto_tv()
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    %m = alloca [130 x i32], align 4
-  // CHECK-NEXT:    call void @_Z3fooILm13EEvRu11matrix_typeIXT_EXLm10EEiE([130 x i32]* nonnull align 4 dereferenceable(520) %m)
+  // CHECK-NEXT:    call void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m)
   // CHECK-NEXT:    ret void
 
-  // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_EXLm10EEiE([130 x i32]* nonnull align 4 dereferenceable(520) %m)
+  // CHECK-LABEL: define linkonce_odr void @_Z3fooILm13EEvRu11matrix_typeIXT_ELm10EiE([130 x i32]* nonnull align 4 dereferenceable(520) %m)
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    %m.addr = alloca [130 x i32]*, align 8
   // CHECK-NEXT:    store [130 x i32]* %m, [130 x i32]** %m.addr, align 8
@@ -326,7 +326,7 @@ void test_use_matrix_2() {
   // CHECK-NEXT:    store <40 x float> %call, <40 x float>* %0, align 4
   // CHECK-NEXT:    call void @_Z12use_matrix_2ILm2ELm12EE8selectorILi0EERu11matrix_typeIXplT_Li2EEXdvT0_Li2EEiERu11matrix_typeIXT_EXT0_EfE([24 x i32]* nonnull align 4 dereferenceable(96) %m1, [24 x float]* nonnull align 4 dereferenceable(96) %m2)
   // CHECK-NEXT:    call void @_Z12use_matrix_2ILm5ELm8EE8selectorILi1EERu11matrix_typeIXplT_T0_EXT0_EiERu11matrix_typeIXT_EXmiT0_T_EfE([104 x i32]* nonnull align 4 dereferenceable(416) %m3, [15 x float]* nonnull align 4 dereferenceable(60) %m4)
-  // CHECK-NEXT:    %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_EXLm10EEiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5)
+  // CHECK-NEXT:    %call2 = call <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m5)
   // CHECK-NEXT:    %1 = bitcast [20 x float]* %r4 to <20 x float>*
   // CHECK-NEXT:    store <20 x float> %call2, <20 x float>* %1, align 4
   // CHECK-NEXT:    call void @_Z12use_matrix_3ILm6EE8selectorILi2EERu11matrix_typeIXmiT_Li2EEXT_EiE([24 x i32]* nonnull align 4 dereferenceable(96) %m1)
@@ -357,7 +357,7 @@ void test_use_matrix_2() {
   // CHECK-NEXT:    call void @llvm.trap()
   // CHECK-NEXT:    unreachable
 
-  // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_EXLm10EEiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1)
+  // CHECK-LABEL: define linkonce_odr <20 x float> @_Z12use_matrix_2ILm5EEu11matrix_typeIXplT_T_EXmiT_Li3EEfERu11matrix_typeIXT_ELm10EiE([50 x i32]* nonnull align 4 dereferenceable(200) %m1)
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    %m1.addr = alloca [50 x i32]*, align 8
   // CHECK-NEXT:    store [50 x i32]* %m1, [50 x i32]** %m1.addr, align 8
diff --git a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp
index 321f65cacc71..5c02b1eb014c 100644
--- a/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp
+++ b/clang/test/CodeGenCXX/microsoft-uuidof-mangling.cpp
@@ -60,14 +60,12 @@ int main(int argc, const char * argv[])
 // CHECK-V11: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE(
 // CHECK-V12: call void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE(
 // CHECK-V11: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE(
-// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE(
-//    TODO: the above mangling is wrong -- the X/E shouldn't be emitted:       ^                     ^
+// CHECK-V12: call void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofL_ZN9HasMember6memberEEEE(
 // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC1Ev
 // CHECK-V11: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidoftT_E(
 // CHECK-V12: define linkonce_odr void @_Z15test_uuidofTypeI10TestStructEvDTu8__uuidofT_EE(
 // CHECK-V11: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofzsrT_6memberE(
 // CHECK-V12: define linkonce_odr void @_Z15test_uuidofExprI9HasMemberEvDTu8__uuidofXsrT_6memberEEE(
 // CHECK-V11: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofzL_ZN9HasMember6memberEEE(
-// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofXL_ZN9HasMember6memberEEEEE(
-//    TODO: the above mangling is wrong -- the X/E shouldn't be emitted:                      ^                     ^
+// CHECK-V12: define linkonce_odr void @_Z16test_uuidofExpr2I10TestStructEvDTcmtlT_Eu8__uuidofL_ZN9HasMember6memberEEEE(
 // CHECK: define linkonce_odr void @_ZN8UUIDTestI10TestStructL_Z42_GUID_eafa1952_66f8_438b_8fba_af1bbae42191EEC2Ev

From 0b7b698fecd37415a635a586e5ca159ab0b8872f Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight@google.com>
Date: Sun, 24 Jan 2021 16:23:58 -0500
Subject: [PATCH 011/244] Itanium Mangling: In 'enable_if', omit X/E around
 <expr-primary>.

The Clang enable_if extension is mangled as an <extended-qualifier>,
which is supposed to contain <template-args>. However, we were
unconditionally emitting X/E around its arguments, neglecting the fact
that <expr-primary> should be emitted directly without the surrounding
X/E.

Differential Revision: https://reviews.llvm.org/D95488

(cherry picked from commit a7246ba02a8923f316419a62d836dbe1c0b437bd)
---
 clang/lib/AST/ItaniumMangle.cpp            | 14 +++++++--
 clang/test/CodeGen/enable_if.c             | 34 +++++++++++-----------
 clang/test/CodeGenCXX/clang-abi-compat.cpp |  5 ++++
 clang/test/CodeGenCXX/enable_if.cpp        |  2 +-
 4 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 54e2f361a9f1..4420f6a2c1c3 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -727,9 +727,17 @@ void CXXNameMangler::mangleFunctionEncodingBareType(const FunctionDecl *FD) {
       EnableIfAttr *EIA = dyn_cast<EnableIfAttr>(*I);
       if (!EIA)
         continue;
-      Out << 'X';
-      mangleExpression(EIA->getCond());
-      Out << 'E';
+      if (Context.getASTContext().getLangOpts().getClangABICompat() >
+          LangOptions::ClangABI::Ver11) {
+        mangleTemplateArgExpr(EIA->getCond());
+      } else {
+        // Prior to Clang 12, we hardcoded the X/E around enable-if's argument,
+        // even though <template-arg> should not include an X/E around
+        // <expr-primary>.
+        Out << 'X';
+        mangleExpression(EIA->getCond());
+        Out << 'E';
+      }
     }
     Out << 'E';
     FunctionTypeDepth.pop(Saved);
diff --git a/clang/test/CodeGen/enable_if.c b/clang/test/CodeGen/enable_if.c
index 14550b9e2db9..327a201cdeba 100644
--- a/clang/test/CodeGen/enable_if.c
+++ b/clang/test/CodeGen/enable_if.c
@@ -31,22 +31,22 @@ void bar(int m) __attribute__((overloadable, enable_if(m > 0, "")));
 void bar(int m) __attribute__((overloadable, enable_if(1, "")));
 // CHECK-LABEL: define{{.*}} void @test2
 void test2() {
-  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi
   void (*p)(int) = bar;
-  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi
   void (*p2)(int) = &bar;
-  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi
   p = bar;
-  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifILi1EEi
   p = &bar;
 
-  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*)
   void *vp1 = (void*)&bar;
-  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*)
   void *vp2 = (void*)bar;
-  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*)
   vp1 = (void*)&bar;
-  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifILi1EEi to i8*)
   vp1 = (void*)bar;
 }
 
@@ -54,13 +54,13 @@ void baz(int m) __attribute__((overloadable, enable_if(1, "")));
 void baz(int m) __attribute__((overloadable));
 // CHECK-LABEL: define{{.*}} void @test3
 void test3() {
-  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi
   void (*p)(int) = baz;
-  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi
   void (*p2)(int) = &baz;
-  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi
   p = baz;
-  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifILi1EEi
   p = &baz;
 }
 
@@ -71,13 +71,13 @@ void qux(int m) __attribute__((overloadable, enable_if(1, ""),
 void qux(int m) __attribute__((overloadable, enable_if(1, "")));
 // CHECK-LABEL: define{{.*}} void @test4
 void test4() {
-  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi
   void (*p)(int) = qux;
-  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi
   void (*p2)(int) = &qux;
-  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi
   p = qux;
-  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXLi1EEEi
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifILi1ELi1EEi
   p = &qux;
 }
 
@@ -90,6 +90,6 @@ void test5() {
   int foo(char *i __attribute__((pass_object_size(0))))
       __attribute__((enable_if(1, ""), overloadable));
 
-  // CHECK: call i32 @_Z3fooUa9enable_ifIXLi1EEEPcU17pass_object_size0
+  // CHECK: call i32 @_Z3fooUa9enable_ifILi1EEPcU17pass_object_size0
   foo((void*)0);
 }
diff --git a/clang/test/CodeGenCXX/clang-abi-compat.cpp b/clang/test/CodeGenCXX/clang-abi-compat.cpp
index caf06bd5f9f6..80311aa320fe 100644
--- a/clang/test/CodeGenCXX/clang-abi-compat.cpp
+++ b/clang/test/CodeGenCXX/clang-abi-compat.cpp
@@ -130,4 +130,9 @@ template void test7<A>(A::Int<1>);
 template<int N> using matrix1xN = int __attribute__((matrix_type(1, N)));
 template<int N> void test8(matrix1xN<N> a) {}
 template void test8<2>(matrix1xN<2> a);
+
+// PRE12: @_ZN12expr_primary5test9EUa9enable_ifIXLi1EEEv
+// V12:   @_ZN12expr_primary5test9EUa9enable_ifILi1EEv
+void test9(void) __attribute__((enable_if(1, ""))) {}
+
 }
diff --git a/clang/test/CodeGenCXX/enable_if.cpp b/clang/test/CodeGenCXX/enable_if.cpp
index 4e7707aaeed9..70386b87fcee 100644
--- a/clang/test/CodeGenCXX/enable_if.cpp
+++ b/clang/test/CodeGenCXX/enable_if.cpp
@@ -5,7 +5,7 @@ int test5(int);
 template <typename T>
 T test5(T) __attribute__((enable_if(1, "better than non-template")));
 
-// CHECK: @_Z5test5IiEUa9enable_ifIXLi1EEET_S0_
+// CHECK: @_Z5test5IiEUa9enable_ifILi1EET_S0_
 int (*Ptr)(int) = &test5;
 
 // Test itanium mangling for attribute enable_if

From de3396d89d998769c3310c23bdd49babade9d874 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Thu, 28 Jan 2021 15:30:21 -0800
Subject: [PATCH 012/244] workflows: Update branch names

Also remove main-brancy-sync workflow that was removed from the main branch.
---
 .github/workflows/clang-tests.yml      |  6 +++---
 .github/workflows/libclc-tests.yml     |  6 +++---
 .github/workflows/lld-tests.yml        |  6 +++---
 .github/workflows/lldb-tests.yml       |  6 +++---
 .github/workflows/llvm-tests.yml       | 10 +++++-----
 .github/workflows/main-branch-sync.yml | 25 -------------------------
 6 files changed, 17 insertions(+), 42 deletions(-)
 delete mode 100644 .github/workflows/main-branch-sync.yml

diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml
index f8ca65e10726..af0b5eabeeda 100644
--- a/.github/workflows/clang-tests.yml
+++ b/.github/workflows/clang-tests.yml
@@ -28,16 +28,16 @@ jobs:
     steps:
     - name: Setup Windows
       if: startsWith(matrix.os, 'windows')
-      uses: llvm/actions/setup-windows@master
+      uses: llvm/actions/setup-windows@main
       with:
         arch: amd64
     - name: Install Ninja
-      uses: llvm/actions/install-ninja@master
+      uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
         fetch-depth: 1
     - name: Test clang
-      uses: llvm/actions/build-test-llvm-project@master
+      uses: llvm/actions/build-test-llvm-project@main
       with:
         cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release
         build_target: check-clang
diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml
index 4e8639b1c89a..2f1eb2939ea2 100644
--- a/.github/workflows/libclc-tests.yml
+++ b/.github/workflows/libclc-tests.yml
@@ -31,16 +31,16 @@ jobs:
     steps:
     - name: Setup Windows
       if: startsWith(matrix.os, 'windows')
-      uses: llvm/actions/setup-windows@master
+      uses: llvm/actions/setup-windows@main
       with:
         arch: amd64
     - name: Install Ninja
-      uses: llvm/actions/install-ninja@master
+      uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
         fetch-depth: 1
     - name: Build clang
-      uses: llvm/actions/build-test-llvm-project@master
+      uses: llvm/actions/build-test-llvm-project@main
       with:
         cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release
         build_target: ""
diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml
index 9b4cbe95f231..bdf0c2fcd886 100644
--- a/.github/workflows/lld-tests.yml
+++ b/.github/workflows/lld-tests.yml
@@ -28,16 +28,16 @@ jobs:
     steps:
     - name: Setup Windows
       if: startsWith(matrix.os, 'windows')
-      uses: llvm/actions/setup-windows@master
+      uses: llvm/actions/setup-windows@main
       with:
         arch: amd64
     - name: Install Ninja
-      uses: llvm/actions/install-ninja@master
+      uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
         fetch-depth: 1
     - name: Test lld
-      uses: llvm/actions/build-test-llvm-project@master
+      uses: llvm/actions/build-test-llvm-project@main
       with:
         cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="lld" -DCMAKE_BUILD_TYPE=Release
         build_target: check-lld
diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml
index 229e6deece6e..93fddc2de8c6 100644
--- a/.github/workflows/lldb-tests.yml
+++ b/.github/workflows/lldb-tests.yml
@@ -31,16 +31,16 @@ jobs:
     steps:
     - name: Setup Windows
       if: startsWith(matrix.os, 'windows')
-      uses: llvm/actions/setup-windows@master
+      uses: llvm/actions/setup-windows@main
       with:
         arch: amd64
     - name: Install Ninja
-      uses: llvm/actions/install-ninja@master
+      uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
         fetch-depth: 1
     - name: Build lldb
-      uses: llvm/actions/build-test-llvm-project@master
+      uses: llvm/actions/build-test-llvm-project@main
       with:
         # Mac OS requries that libcxx is enabled for lldb tests, so we need  to disable them.
         cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang;lldb" -DCMAKE_BUILD_TYPE=Release -DLLDB_INCLUDE_TESTS=OFF
diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 67f318ad849f..675383407d64 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -29,16 +29,16 @@ jobs:
     steps:
     - name: Setup Windows
       if: startsWith(matrix.os, 'windows')
-      uses: llvm/actions/setup-windows@master
+      uses: llvm/actions/setup-windows@main
       with:
         arch: amd64
     - name: Install Ninja
-      uses: llvm/actions/install-ninja@master
+      uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
         fetch-depth: 1
     - name: Test llvm
-      uses: llvm/actions/build-test-llvm-project@master
+      uses: llvm/actions/build-test-llvm-project@main
       with:
         cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release
 
@@ -60,7 +60,7 @@ jobs:
             repo: ${{ github.repository }}
     steps:
     - name: Install Ninja
-      uses: llvm/actions/install-ninja@master
+      uses: llvm/actions/install-ninja@main
     - name: Install abi-compliance-checker
       run: |
         sudo apt-get install abi-dumper autoconf pkg-config
@@ -72,7 +72,7 @@ jobs:
         ./configure
         sudo make install
     - name: Download source code
-      uses: llvm/actions/get-llvm-project-src@master
+      uses: llvm/actions/get-llvm-project-src@main
       with:
         ref: ${{ matrix.ref }}
         repo: ${{ matrix.repo }}
diff --git a/.github/workflows/main-branch-sync.yml b/.github/workflows/main-branch-sync.yml
deleted file mode 100644
index 5ea360e281d6..000000000000
--- a/.github/workflows/main-branch-sync.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: main branch sync
-
-on:
-  push:
-    branches:
-      - 'main'
-
-jobs:
-  branch_sync:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@v2
-        with:
-          # persist-credentials: false allows us to use our own credentials for
-          # pushing to the repository.  Otherwise, the default github actions token
-          # is used.
-          persist-credentials: false
-          fetch-depth: 0
-
-      - name: Update branch
-        env:
-          LLVMBOT_TOKEN: ${{ secrets.LLVMBOT_MAIN_SYNC }}
-        run: |
-          git push https://$LLVMBOT_TOKEN@github.com/${{ github.repository }} HEAD:master

From 0a32d93bd95b7ad0a4c7f91955c6c815150df84c Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej@gmail.com>
Date: Wed, 27 Jan 2021 09:14:22 +0100
Subject: [PATCH 013/244] [clang-format] Avoid considering include directive as
 a template closer.

This fixes a bug [[ http://llvm.org/PR48891 | PR48891 ]] introduced in D93839 where:
```
#include <stdint.h>
namespace rep {}
```
got formatted as
```
#include <stdint.h>
namespace rep {
}
```

Reviewed By: MyDeveloperDay, leonardchan

Differential Revision: https://reviews.llvm.org/D95479

(cherry picked from commit e3713f156b8cb65a2b74f150afb824ce1e2a2fab)
---
 clang/lib/Format/UnwrappedLineFormatter.cpp |  2 +-
 clang/unittests/Format/FormatTest.cpp       | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index d1138bbc9c36..5dd0ccdfa6fd 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -371,7 +371,7 @@ class LineJoiner {
         if (Previous->is(tok::comment))
           Previous = Previous->getPreviousNonComment();
         if (Previous) {
-          if (Previous->is(tok::greater))
+          if (Previous->is(tok::greater) && !I[-1]->InPPDirective)
             return 0;
           if (Previous->is(tok::identifier)) {
             const FormatToken *PreviousPrevious =
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 855cf0242fe9..c1f88b9ae17a 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -10248,6 +10248,21 @@ TEST_F(FormatTest, SplitEmptyClass) {
                "{\n"
                "};",
                Style);
+
+  verifyFormat("#include \"stdint.h\"\n"
+               "namespace rep {}",
+               Style);
+  verifyFormat("#include <stdint.h>\n"
+               "namespace rep {}",
+               Style);
+  verifyFormat("#include <stdint.h>\n"
+               "namespace rep {}",
+               "#include <stdint.h>\n"
+               "namespace rep {\n"
+               "\n"
+               "\n"
+               "}",
+               Style);
 }
 
 TEST_F(FormatTest, SplitEmptyStruct) {

From 8c5d184ef714dcf435784e21e66b4b5e25b2dffb Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Wed, 27 Jan 2021 16:51:27 -0500
Subject: [PATCH 014/244] clang: Fix static_assert in a few contexts in
 microsoft mode

Follow-up to D17444. Fixes PR48904. See bug for details.

Differential Revision: https://reviews.llvm.org/D95559

(cherry picked from commit 764a7a2155c6747ec8d0b38d8edbb65960eae874)
---
 clang/lib/Parse/ParseDecl.cpp   |  3 ++-
 clang/test/Sema/static-assert.c | 11 +++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 571164139630..347d992b1643 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4216,7 +4216,7 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
     }
 
     // Parse _Static_assert declaration.
-    if (Tok.is(tok::kw__Static_assert)) {
+    if (Tok.isOneOf(tok::kw__Static_assert, tok::kw_static_assert)) {
       SourceLocation DeclEnd;
       ParseStaticAssertDeclaration(DeclEnd);
       continue;
@@ -5180,6 +5180,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_friend:
 
     // static_assert-declaration
+  case tok::kw_static_assert:
   case tok::kw__Static_assert:
 
     // GNU typeof support.
diff --git a/clang/test/Sema/static-assert.c b/clang/test/Sema/static-assert.c
index f08e557fc8ea..9105f2366985 100644
--- a/clang/test/Sema/static-assert.c
+++ b/clang/test/Sema/static-assert.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c11 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fms-compatibility -DMS -fsyntax-only -verify %s
 // RUN: %clang_cc1 -std=c99 -pedantic -fsyntax-only -verify=expected,ext %s
 // RUN: %clang_cc1 -xc++ -std=c++11 -pedantic -fsyntax-only -verify=expected,ext,cxx %s
 
@@ -11,10 +12,17 @@ _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 ex
 _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \
                                    // ext-warning {{'_Static_assert' is a C11 extension}}
 
+#ifdef MS
+static_assert(1, "1 is nonzero");
+#endif
+
 void foo(void) {
   _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 extension}}
   _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \
                                      // ext-warning {{'_Static_assert' is a C11 extension}}
+#ifdef MS
+  static_assert(1, "1 is nonzero");
+#endif
 }
 
 _Static_assert(1, invalid); // expected-error {{expected string literal for diagnostic message in static_assert}} \
@@ -25,6 +33,9 @@ struct A {
   _Static_assert(1, "1 is nonzero"); // ext-warning {{'_Static_assert' is a C11 extension}}
   _Static_assert(0, "0 is nonzero"); // expected-error {{static_assert failed "0 is nonzero"}} \
                                      // ext-warning {{'_Static_assert' is a C11 extension}}
+#ifdef MS
+  static_assert(1, "1 is nonzero");
+#endif
 };
 
 #ifdef __cplusplus

From 1edbbf9d20d9f859f7ff2a146a501aeb1423141e Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 20 Jan 2021 12:38:32 +0100
Subject: [PATCH 015/244] [clangd] Log warning when using legacy (theia)
 semantic highlighting.

The legacy protocol will be removed on trunk after the 12 branch cut,
and gone in clangd 13.

Differential Revision: https://reviews.llvm.org/D95031

(cherry picked from commit 29472bb76915c4929aecc938300f6df31f63ac29)
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index dc89ebd59fe2..35aed2166f03 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -510,6 +510,11 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
         "semanticTokens request, choosing the latter (no notifications).");
     Opts.TheiaSemanticHighlighting = false;
   }
+  if (Opts.TheiaSemanticHighlighting) {
+    log("Using legacy semanticHighlights notification, which will be removed "
+        "in clangd 13. Clients should use the standard semanticTokens "
+        "request instead.");
+  }
 
   if (Params.rootUri && *Params.rootUri)
     Opts.WorkspaceRoot = std::string(Params.rootUri->file());

From 61e05d1bc1af737c5f24fd5cd765f1a9914cbd13 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Mon, 25 Jan 2021 16:16:22 +0100
Subject: [PATCH 016/244] [clangd] Parse Diagnostics block, and nest ClangTidy
 block under it.

(ClangTidy configuration block hasn't been in any release, so we should be OK
to move it around like this)

Differential Revision: https://reviews.llvm.org/D95362

(cherry picked from commit c3df9d58c75e0f89ca95e947804d65e79a491adc)
---
 clang-tools-extra/clangd/Config.h             | 15 +++---
 clang-tools-extra/clangd/ConfigCompile.cpp    | 14 +++---
 clang-tools-extra/clangd/ConfigFragment.h     | 47 +++++++++----------
 clang-tools-extra/clangd/ConfigYAML.cpp       | 14 +++++-
 clang-tools-extra/clangd/TidyProvider.cpp     |  2 +-
 .../clangd/unittests/ConfigCompileTests.cpp   | 32 +++++++------
 .../clangd/unittests/ConfigYAMLTests.cpp      | 11 +++--
 7 files changed, 75 insertions(+), 60 deletions(-)

diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index 44ca283b6a0e..391632cb086a 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -90,6 +90,13 @@ struct Config {
   struct {
     bool SuppressAll = false;
     llvm::StringSet<> Suppress;
+
+    /// Configures what clang-tidy checks to run and options to use with them.
+    struct {
+      // A comma-seperated list of globs specify which clang-tidy checks to run.
+      std::string Checks;
+      llvm::StringMap<std::string> CheckOptions;
+    } ClangTidy;
   } Diagnostics;
 
   /// Style of the codebase.
@@ -99,14 +106,6 @@ struct Config {
     // ::). All nested namespaces are affected as well.
     std::vector<std::string> FullyQualifiedNamespaces;
   } Style;
-
-  /// Configures what clang-tidy checks to run and options to use with them.
-  struct {
-    // A comma-seperated list of globs to specify which clang-tidy checks to
-    // run.
-    std::string Checks;
-    llvm::StringMap<std::string> CheckOptions;
-  } ClangTidy;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index e82c6e159421..8682cae36f26 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -189,7 +189,6 @@ struct FragmentCompiler {
     compile(std::move(F.CompileFlags));
     compile(std::move(F.Index));
     compile(std::move(F.Diagnostics));
-    compile(std::move(F.ClangTidy));
   }
 
   void compile(Fragment::IfBlock &&F) {
@@ -379,6 +378,8 @@ struct FragmentCompiler {
         for (llvm::StringRef N : Normalized)
           C.Diagnostics.Suppress.insert(N);
       });
+
+    compile(std::move(F.ClangTidy));
   }
 
   void compile(Fragment::StyleBlock &&F) {
@@ -422,7 +423,7 @@ struct FragmentCompiler {
     CurSpec += Str;
   }
 
-  void compile(Fragment::ClangTidyBlock &&F) {
+  void compile(Fragment::DiagnosticsBlock::ClangTidyBlock &&F) {
     std::string Checks;
     for (auto &CheckGlob : F.Add)
       appendTidyCheckSpec(Checks, CheckGlob, true);
@@ -433,8 +434,9 @@ struct FragmentCompiler {
     if (!Checks.empty())
       Out.Apply.push_back(
           [Checks = std::move(Checks)](const Params &, Config &C) {
-            C.ClangTidy.Checks.append(
-                Checks, C.ClangTidy.Checks.empty() ? /*skip comma*/ 1 : 0,
+            C.Diagnostics.ClangTidy.Checks.append(
+                Checks,
+                C.Diagnostics.ClangTidy.Checks.empty() ? /*skip comma*/ 1 : 0,
                 std::string::npos);
           });
     if (!F.CheckOptions.empty()) {
@@ -445,8 +447,8 @@ struct FragmentCompiler {
       Out.Apply.push_back(
           [CheckOptions = std::move(CheckOptions)](const Params &, Config &C) {
             for (auto &StringPair : CheckOptions)
-              C.ClangTidy.CheckOptions.insert_or_assign(StringPair.first,
-                                                        StringPair.second);
+              C.Diagnostics.ClangTidy.CheckOptions.insert_or_assign(
+                  StringPair.first, StringPair.second);
           });
     }
   }
diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index 5b67c49fe154..c36b07f5e8e2 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -203,6 +203,29 @@ struct Fragment {
     /// (e.g. by disabling a clang-tidy check, or the -Wunused compile flag).
     /// This often has other advantages, such as skipping some analysis.
     std::vector<Located<std::string>> Suppress;
+
+    /// Controls how clang-tidy will run over the code base.
+    ///
+    /// The settings are merged with any settings found in .clang-tidy
+    /// configiration files with these ones taking precedence.
+    struct ClangTidyBlock {
+      std::vector<Located<std::string>> Add;
+      /// List of checks to disable.
+      /// Takes precedence over Add. To enable all llvm checks except include
+      /// order:
+      ///   Add: llvm-*
+      ///   Remove: llvm-include-onder
+      std::vector<Located<std::string>> Remove;
+
+      /// A Key-Value pair list of options to pass to clang-tidy checks
+      /// These take precedence over options specified in clang-tidy
+      /// configuration files. Example:
+      ///   CheckOptions:
+      ///     readability-braces-around-statements.ShortStatementLines: 2
+      std::vector<std::pair<Located<std::string>, Located<std::string>>>
+          CheckOptions;
+    };
+    ClangTidyBlock ClangTidy;
   };
   DiagnosticsBlock Diagnostics;
 
@@ -215,30 +238,6 @@ struct Fragment {
     std::vector<Located<std::string>> FullyQualifiedNamespaces;
   };
   StyleBlock Style;
-
-  /// Controls how clang-tidy will run over the code base.
-  ///
-  /// The settings are merged with any settings found in .clang-tidy
-  /// configiration files with these ones taking precedence.
-  // FIXME: move this to Diagnostics.Tidy.
-  struct ClangTidyBlock {
-    std::vector<Located<std::string>> Add;
-    /// List of checks to disable.
-    /// Takes precedence over Add. To enable all llvm checks except include
-    /// order:
-    ///   Add: llvm-*
-    ///   Remove: llvm-include-onder
-    std::vector<Located<std::string>> Remove;
-
-    /// A Key-Value pair list of options to pass to clang-tidy checks
-    /// These take precedence over options specified in clang-tidy configuration
-    /// files. Example:
-    ///   CheckOptions:
-    ///     readability-braces-around-statements.ShortStatementLines: 2
-    std::vector<std::pair<Located<std::string>, Located<std::string>>>
-        CheckOptions;
-  };
-  ClangTidyBlock ClangTidy;
 };
 
 } // namespace config
diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index 7aaff5565497..348ee9dd1f75 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -62,7 +62,7 @@ class Parser {
     Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); });
     Dict.handle("Index", [&](Node &N) { parse(F.Index, N); });
     Dict.handle("Style", [&](Node &N) { parse(F.Style, N); });
-    Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); });
+    Dict.handle("Diagnostics", [&](Node &N) { parse(F.Diagnostics, N); });
     Dict.parse(N);
     return !(N.failed() || HadError);
   }
@@ -110,7 +110,17 @@ class Parser {
     Dict.parse(N);
   }
 
-  void parse(Fragment::ClangTidyBlock &F, Node &N) {
+  void parse(Fragment::DiagnosticsBlock &F, Node &N) {
+    DictParser Dict("Diagnostics", this);
+    Dict.handle("Suppress", [&](Node &N) {
+      if (auto Values = scalarValues(N))
+        F.Suppress = std::move(*Values);
+    });
+    Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); });
+    Dict.parse(N);
+  }
+
+  void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) {
     DictParser Dict("ClangTidy", this);
     Dict.handle("Add", [&](Node &N) {
       if (auto Values = scalarValues(N))
diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp
index 0a9f12221287..c26c59fd347d 100644
--- a/clang-tools-extra/clangd/TidyProvider.cpp
+++ b/clang-tools-extra/clangd/TidyProvider.cpp
@@ -255,7 +255,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef<std::string> ExtraBadChecks) {
 
 TidyProviderRef provideClangdConfig() {
   return [](tidy::ClangTidyOptions &Opts, llvm::StringRef) {
-    const auto &CurTidyConfig = Config::current().ClangTidy;
+    const auto &CurTidyConfig = Config::current().Diagnostics.ClangTidy;
     if (!CurTidyConfig.Checks.empty())
       mergeCheckList(Opts.Checks, CurTidyConfig.Checks);
 
diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
index ef24b5d8417f..4b1da2035727 100644
--- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
@@ -259,32 +259,36 @@ TEST_F(ConfigCompileTests, DiagnosticSuppression) {
 }
 
 TEST_F(ConfigCompileTests, Tidy) {
-  Frag.ClangTidy.Add.emplace_back("bugprone-use-after-move");
-  Frag.ClangTidy.Add.emplace_back("llvm-*");
-  Frag.ClangTidy.Remove.emplace_back("llvm-include-order");
-  Frag.ClangTidy.Remove.emplace_back("readability-*");
-  Frag.ClangTidy.CheckOptions.emplace_back(
+  auto &Tidy = Frag.Diagnostics.ClangTidy;
+  Tidy.Add.emplace_back("bugprone-use-after-move");
+  Tidy.Add.emplace_back("llvm-*");
+  Tidy.Remove.emplace_back("llvm-include-order");
+  Tidy.Remove.emplace_back("readability-*");
+  Tidy.CheckOptions.emplace_back(
       std::make_pair(std::string("StrictMode"), std::string("true")));
-  Frag.ClangTidy.CheckOptions.emplace_back(std::make_pair(
+  Tidy.CheckOptions.emplace_back(std::make_pair(
       std::string("example-check.ExampleOption"), std::string("0")));
   EXPECT_TRUE(compileAndApply());
   EXPECT_EQ(
-      Conf.ClangTidy.Checks,
+      Conf.Diagnostics.ClangTidy.Checks,
       "bugprone-use-after-move,llvm-*,-llvm-include-order,-readability-*");
-  EXPECT_EQ(Conf.ClangTidy.CheckOptions.size(), 2U);
-  EXPECT_EQ(Conf.ClangTidy.CheckOptions.lookup("StrictMode"), "true");
-  EXPECT_EQ(Conf.ClangTidy.CheckOptions.lookup("example-check.ExampleOption"),
+  EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.size(), 2U);
+  EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.lookup("StrictMode"),
+            "true");
+  EXPECT_EQ(Conf.Diagnostics.ClangTidy.CheckOptions.lookup(
+                "example-check.ExampleOption"),
             "0");
   EXPECT_THAT(Diags.Diagnostics, IsEmpty());
 }
 
 TEST_F(ConfigCompileTests, TidyBadChecks) {
-  Frag.ClangTidy.Add.emplace_back("unknown-check");
-  Frag.ClangTidy.Remove.emplace_back("*");
-  Frag.ClangTidy.Remove.emplace_back("llvm-includeorder");
+  auto &Tidy = Frag.Diagnostics.ClangTidy;
+  Tidy.Add.emplace_back("unknown-check");
+  Tidy.Remove.emplace_back("*");
+  Tidy.Remove.emplace_back("llvm-includeorder");
   EXPECT_TRUE(compileAndApply());
   // Ensure bad checks are stripped from the glob.
-  EXPECT_EQ(Conf.ClangTidy.Checks, "-*");
+  EXPECT_EQ(Conf.Diagnostics.ClangTidy.Checks, "-*");
   EXPECT_THAT(
       Diags.Diagnostics,
       ElementsAre(
diff --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
index 25d468ba604a..e1c81344de20 100644
--- a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -60,10 +60,11 @@ CompileFlags: { Add: [foo, bar] }
 Index:
   Background: Skip
 ---
-ClangTidy: 
-  CheckOptions: 
-    IgnoreMacros: true
-    example-check.ExampleOption: 0
+Diagnostics:
+  ClangTidy:
+    CheckOptions:
+      IgnoreMacros: true
+      example-check.ExampleOption: 0
   )yaml";
   auto Results = Fragment::parseYAML(YAML, "config.yaml", Diags.callback());
   EXPECT_THAT(Diags.Diagnostics, IsEmpty());
@@ -77,7 +78,7 @@ CompileFlags: { Add: [foo, bar] }
 
   ASSERT_TRUE(Results[2].Index.Background);
   EXPECT_EQ("Skip", *Results[2].Index.Background.getValue());
-  EXPECT_THAT(Results[3].ClangTidy.CheckOptions,
+  EXPECT_THAT(Results[3].Diagnostics.ClangTidy.CheckOptions,
               ElementsAre(PairVal("IgnoreMacros", "true"),
                           PairVal("example-check.ExampleOption", "0")));
 }

From 074ad6de6fae20ff7ff720f79df1d6c1a7845157 Mon Sep 17 00:00:00 2001
From: AndreyChurbanov <andrey.churbanov@intel.com>
Date: Fri, 29 Jan 2021 13:16:41 +0300
Subject: [PATCH 017/244] [OpenMP] libomp: fix build by cl with vs2019

Replace VLA with dynamic allocation using alloca().
This fixes https://bugs.llvm.org/show_bug.cgi?id=48919.

Differential Revision: https://reviews.llvm.org/D95627

(cherry picked from commit 7f5ad0e07162e0c19e569986ee37a17c147c9a27)
---
 openmp/runtime/src/kmp_settings.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index a8522130f972..b477edbbfb42 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -3355,7 +3355,8 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value,
         ntraits++;
     }
   }
-  omp_alloctrait_t traits[ntraits];
+  omp_alloctrait_t *traits =
+      (omp_alloctrait_t *)KMP_ALLOCA(ntraits * sizeof(omp_alloctrait_t));
 
 // Helper macros
 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)

From 99f43f598907a9cc1a613c691ffbce7c8bd4ec75 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Thu, 28 Jan 2021 14:37:33 +0100
Subject: [PATCH 018/244] Relax test expectations in
 debug-info-gline-tables-only-codeview.cpp

To make it pass also on 32-bit Windows, see PR48920.

(cherry picked from commit 0024efc69ea6cd0b630cd11cef5991b7edb73ffc)
---
 clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp b/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp
index 27ac682c10f5..409b62da62c1 100644
--- a/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp
+++ b/clang/test/CodeGenCXX/debug-info-gline-tables-only-codeview.cpp
@@ -25,6 +25,6 @@ void test() {
   // CHECK: ![[C]] = !DICompositeType(tag: DW_TAG_structure_type, name: "C",
   // CHECK-SAME:                      flags: DIFlagFwdDecl
   // CHECK-NOT: identifier
-  // CHECK: ![[MTYPE]] = !DISubroutineType(types: !{{.*}})
+  // CHECK: ![[MTYPE]] = !DISubroutineType({{.*}}types: !{{.*}})
   c.m();
 }

From c5a1eb9b0a76eef7e3025b7333a0d256b8562360 Mon Sep 17 00:00:00 2001
From: Piotr Sobczak <Piotr.Sobczak@amd.com>
Date: Wed, 27 Jan 2021 16:02:49 +0100
Subject: [PATCH 019/244] [AMDGPU] Avoid an illegal operand in
 si-shrink-instructions

Before the patch it was possible to trigger a constant bus
violation when folding immediates into a shrunk instruction.

The patch adds a check to enforce the legality of the new operand.

Differential Revision: https://reviews.llvm.org/D95527

(cherry picked from commit fc8e7411218c846386650cfba111b62827c71da0)
---
 .../Target/AMDGPU/SIShrinkInstructions.cpp    | 24 ++++++++++---------
 .../shrink-instructions-illegal-fold.mir      | 23 ++++++++++++++++++
 2 files changed, 36 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 2628070f219c..cdb78aae1c4f 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -75,17 +75,19 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
         MachineOperand &MovSrc = Def->getOperand(1);
         bool ConstantFolded = false;
 
-        if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
-                               isUInt<32>(MovSrc.getImm()))) {
-          Src0.ChangeToImmediate(MovSrc.getImm());
-          ConstantFolded = true;
-        } else if (MovSrc.isFI()) {
-          Src0.ChangeToFrameIndex(MovSrc.getIndex());
-          ConstantFolded = true;
-        } else if (MovSrc.isGlobal()) {
-          Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
-                          MovSrc.getTargetFlags());
-          ConstantFolded = true;
+        if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
+          if (MovSrc.isImm() &&
+              (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
+            Src0.ChangeToImmediate(MovSrc.getImm());
+            ConstantFolded = true;
+          } else if (MovSrc.isFI()) {
+            Src0.ChangeToFrameIndex(MovSrc.getIndex());
+            ConstantFolded = true;
+          } else if (MovSrc.isGlobal()) {
+            Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+                            MovSrc.getTargetFlags());
+            ConstantFolded = true;
+          }
         }
 
         if (ConstantFolded) {
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir b/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir
new file mode 100644
index 000000000000..7889f437facf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/shrink-instructions-illegal-fold.mir
@@ -0,0 +1,23 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-shrink-instructions --verify-machineinstrs %s -o - | FileCheck %s
+
+# Make sure immediate folding into V_CNDMASK respects constant bus restrictions.
+---
+
+name:            shrink_cndmask_illegal_imm_folding
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: shrink_cndmask_illegal_imm_folding
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK: [[MOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32768, implicit $exec
+    ; CHECK: V_CMP_EQ_U32_e32 0, [[COPY]], implicit-def $vcc, implicit $exec
+    ; CHECK: V_CNDMASK_B32_e32 [[MOV]], killed [[COPY]], implicit $vcc, implicit $exec
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_MOV_B32_e32 32768, implicit $exec
+    V_CMP_EQ_U32_e32 0, %0:vgpr_32, implicit-def $vcc, implicit $exec
+    %2:vgpr_32 = V_CNDMASK_B32_e64 0, %1:vgpr_32, 0, killed %0:vgpr_32, $vcc, implicit $exec
+    S_NOP 0
+
+...

From b2710e7535bd43d9fd6f9792644fe2c207079c42 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Thu, 28 Jan 2021 23:53:45 +0100
Subject: [PATCH 020/244] [sanitizer] Fix msan test build on FreeBSD after
 7afdc89c2054

This commit accidentally enabled fgetgrent_r() in the msan tests under
FreeBSD, but this function is not supported. Also remove FreeBSD from
the SANITIZER_INTERCEPT_FGETGRENT_R macro.

(cherry picked from commit e056fc6cb676f72d5b7dfe7ca540b3275bd1a46f)
---
 compiler-rt/lib/msan/tests/msan_test.cpp                        | 2 ++
 .../lib/sanitizer_common/sanitizer_platform_interceptors.h      | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp
index 7378b237a711..5dc9090f36c0 100644
--- a/compiler-rt/lib/msan/tests/msan_test.cpp
+++ b/compiler-rt/lib/msan/tests/msan_test.cpp
@@ -3707,7 +3707,9 @@ TEST(MemorySanitizer, getgrent_r) {
   EXPECT_NOT_POISONED(grp.gr_gid);
   EXPECT_NOT_POISONED(grpres);
 }
+#endif
 
+#ifdef __GLIBC__
 TEST(MemorySanitizer, fgetgrent_r) {
   FILE *fp = fopen("/etc/group", "r");
   struct group grp;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 7f7b38d4215b..068fc9829e57 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -226,7 +226,7 @@
   (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_GETPWENT \
   (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
-#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_FREEBSD || SI_GLIBC || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_GLIBC || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID || SI_SOLARIS
 #define SANITIZER_INTERCEPT_GETPWENT_R \
   (SI_FREEBSD || SI_NETBSD || SI_GLIBC || SI_SOLARIS)

From 4e20d9c03d9acc9ee5a78cbba82b08d51ecbaf3f Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Thu, 28 Jan 2021 19:01:41 -0800
Subject: [PATCH 021/244] Make the profile-filter.c test compatible with 32-bit
 systems

This addresses PR48930.

Differential Revision: https://reviews.llvm.org/D95658

(cherry picked from commit 0217f1c7a31ba44715bc083a60cddc2192ffed96)
---
 clang/test/CodeGen/profile-filter.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/test/CodeGen/profile-filter.c b/clang/test/CodeGen/profile-filter.c
index 5415ff96cb14..dc5a31e872a1 100644
--- a/clang/test/CodeGen/profile-filter.c
+++ b/clang/test/CodeGen/profile-filter.c
@@ -28,11 +28,11 @@ unsigned i;
 // EXCLUDE: noprofile
 // EXCLUDE: @test1
 unsigned test1() {
-  // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8
-  // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8
-  // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8
-  // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8
-  // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0), align 8
+  // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0)
+  // FUNC: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0)
+  // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0)
+  // SECTION-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0)
+  // EXCLUDE-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test1, i64 0, i64 0)
   return i + 1;
 }
 
@@ -47,10 +47,10 @@ unsigned test1() {
 // EXCLUDE-NOT: noprofile
 // EXCLUDE: @test2
 unsigned test2() {
-  // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8
-  // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8
-  // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8
-  // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8
-  // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0), align 8
+  // CHECK: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0)
+  // FUNC-NOT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0)
+  // FILE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0)
+  // SECTION: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0)
+  // EXCLUDE: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_test2, i64 0, i64 0)
   return i - 1;
 }

From 07f8d437134c0b229104241a621db05013da0049 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@moritz.systems>
Date: Fri, 29 Jan 2021 02:14:47 +0100
Subject: [PATCH 022/244] [clang-tidy] Fix linking tests to LLVMTestingSupport

LLVMTestingSupport is not part of libLLVM, and therefore can not
be linked to via LLVM_LINK_COMPONENTS.  Instead, it needs to be
specified explicitly to ensure that it is linked explicitly
even if LLVM_LINK_LLVM_DYLIB is used.  This is consistent with handling
in clangd.

Fixes PR#48931

Differential Revision: https://reviews.llvm.org/D95653

(cherry picked from commit 632545e8ce846ccaeca8df15a3dc5e36d01a1275)
---
 clang-tools-extra/unittests/clang-tidy/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
index be35b71d15cf..05d330dd8033 100644
--- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   FrontendOpenMP
   Support
-  TestingSupport
   )
 
 get_filename_component(CLANG_LINT_SOURCE_DIR
@@ -46,4 +45,5 @@ target_link_libraries(ClangTidyTests
   clangTidyObjCModule
   clangTidyReadabilityModule
   clangTidyUtils
+  LLVMTestingSupport
   )

From f54cf61ad8e1cc6592074ddd7ad07908623ead6b Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Wed, 27 Jan 2021 17:06:05 -0500
Subject: [PATCH 023/244] [OpenMP][NVPTX] Disable building NVPTX deviceRTL by
 default on a non-CUDA system

D95466 dropped CUDA to build NVPTX deviceRTL and enabled it by default.
However, the building requires some libraries that are not available on non-CUDA
system by default, which could break the compilation. This patch disabled the
build by default. It can be enabled with `LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`.

Reviewed By: kparzysz

Differential Revision: https://reviews.llvm.org/D95556

(cherry picked from commit fb12df4a8e33d759938057718273dfb434b2d9c4)
---
 openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 4661bf08af1c..23efbba29d66 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -10,6 +10,15 @@
 #
 ##===----------------------------------------------------------------------===##
 
+# By default we will not build NVPTX deviceRTL on a non-CUDA
+set(LIBOMPTARGET_BUILD_NVPTX_BCLIB FALSE CACHE BOOL
+  "Whether build NVPTX deviceRTL on non-CUDA system.")
+
+if (NOT (LIBOMPTARGET_DEP_CUDA_FOUND OR LIBOMPTARGET_BUILD_NVPTX_BCLIB))
+  libomptarget_say("Not building NVPTX deviceRTL by default on non-CUDA system.")
+  return()
+endif()
+
 # Check if we can create an LLVM bitcode implementation of the runtime library
 # that could be inlined in the user application. For that we need to find
 # a Clang compiler capable of compiling our CUDA files to LLVM bitcode and

From 07dc51637cc419cbd61383eb4e26713a8f931806 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 30 Jan 2021 13:30:48 +0000
Subject: [PATCH 024/244] [LoopUnswitch] Properly update MSSA if header has
 non-clobbering stores.

This patch fixes updating MemorySSA if the header contains memory
defs that do not clobber a duplicated instruction. We need to find the
first defining access outside the loop body and use that as defining
access of the duplicated instruction.

This fixes a crash caused by bee486851c1a.

(Cherry-picked on the 12.x release branch from
10c57268c074c3ad48f76da38fa2ba575ee3d1f9)
---
 llvm/lib/Transforms/Scalar/LoopUnswitch.cpp   | 10 ++-
 .../partial-unswitch-update-memoryssa.ll      | 76 +++++++++++++++++++
 .../LoopUnswitch/partial-unswitch.ll          | 36 ---------
 3 files changed, 83 insertions(+), 39 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 18717394d384..822a786fc7c7 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1114,12 +1114,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(
 
         Loop *L = LI->getLoopFor(I->getParent());
         auto *DefiningAccess = MemA->getDefiningAccess();
-        // If the defining access is a MemoryPhi in the header, get the incoming
-        // value for the pre-header as defining access.
-        if (DefiningAccess->getBlock() == I->getParent()) {
+        // Get the first defining access before the loop.
+        while (L->contains(DefiningAccess->getBlock())) {
+          // If the defining access is a MemoryPhi, get the incoming
+          // value for the pre-header as defining access.
           if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
             DefiningAccess =
                 MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+          } else {
+            DefiningAccess =
+                cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
           }
         }
         MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll
new file mode 100644
index 000000000000..ec1e8eeeb070
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-update-memoryssa.ll
@@ -0,0 +1,76 @@
+; RUN: opt -loop-unswitch -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s
+; RUN: opt -loop-unswitch -memssa-check-limit=3 -verify-dom-info -verify-memoryssa -S -enable-new-pm=0 %s | FileCheck %s
+
+declare void @clobber()
+
+; Check that MemorySSA updating can deal with a clobbering access of a
+; duplicated load being a MemoryPHI outside the loop.
+define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) {
+; CHECK-LABEL: @partial_unswitch_memssa_update(
+; CHECK-LABEL: loop.ph:
+; CHECK-NEXT:    [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0
+; CHECK-NEXT:    br i1 [[C]]
+entry:
+  br i1 %c, label %loop.ph, label %outside.clobber
+
+outside.clobber:
+  call void @clobber()
+  br label %loop.ph
+
+loop.ph:
+  br label %loop.header
+
+loop.header:
+  %lv = load i32, i32* %ptr, align 4
+  %hc = icmp eq i32 %lv, 0
+  br i1 %hc, label %if, label %then
+
+if:
+  br label %loop.latch
+
+then:
+  br label %loop.latch
+
+loop.latch:
+  br i1 true, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+; Check that MemorySSA updating can deal with skipping defining accesses in the
+; loop body until it finds the first defining access outside the loop.
+define void @partial_unswitch_inloop_stores_beteween_outside_defining_access(i64* noalias %ptr, i16* noalias %src) {
+; CHECK-LABEL: @partial_unswitch_inloop_stores_beteween_outside_defining_access
+; CHECK-LABEL: entry:
+; CHECK-NEXT:    store i64 0, i64* %ptr, align 1
+; CHECK-NEXT:    store i64 1, i64* %ptr, align 1
+; CHECK-NEXT:    [[LV:%[a-z0-9]+]] = load i16, i16* %src, align 1
+; CHECK-NEXT:    [[C:%[a-z0-9]+]] = icmp eq i16 [[LV]], 0
+; CHECK-NEXT:    br i1 [[C]]
+;
+entry:
+  store i64 0, i64* %ptr, align 1
+  store i64 1, i64* %ptr, align 1
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  store i64 2, i64* %ptr, align 1
+  %lv = load i16, i16* %src, align 1
+  %invar.cond = icmp eq i16 %lv, 0
+  br i1 %invar.cond, label %noclobber, label %loop.latch
+
+noclobber:
+  br label %loop.latch
+
+loop.latch:
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv, 1000
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
diff --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll
index 9f0e5d6f6c35..96a6b0f4e2b5 100644
--- a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll
@@ -575,42 +575,6 @@ exit:
   ret i32 10
 }
 
-; Check that MemorySSA updating can deal with a clobbering access of a
-; duplicated load being a MemoryPHI outside the loop.
-define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) {
-; CHECK-LABEL: @partial_unswitch_memssa_update(
-; CHECK-LABEL: loop.ph:
-; CHECK-NEXT:    [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4
-; CHECK-NEXT:    [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0
-; CHECK-NEXT:    br i1 [[C]]
-entry:
-  br i1 %c, label %loop.ph, label %outside.clobber
-
-outside.clobber:
-  call void @clobber()
-  br label %loop.ph
-
-loop.ph:
-  br label %loop.header
-
-loop.header:
-  %lv = load i32, i32* %ptr, align 4
-  %hc = icmp eq i32 %lv, 0
-  br i1 %hc, label %if, label %then
-
-if:
-  br label %loop.latch
-
-then:
-  br label %loop.latch
-
-loop.latch:
-  br i1 true, label %loop.header, label %exit
-
-exit:
-  ret void
-}
-
 ; Make sure the duplicated instructions are moved to a preheader that always
 ; executes when the loop body also executes. Do not check the unswitched code,
 ; because it is already checked in the @partial_unswitch_true_successor test

From c5fd87eaddaad87b28530e5272b7cf0c788dc1f9 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 27 Jan 2021 03:09:20 +0000
Subject: [PATCH 025/244] workflows: Fix LLVM ABI checks to work for X.0.0
 releases

---
 .github/workflows/llvm-tests.yml | 84 +++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 675383407d64..1cffc3ef4d97 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -1,8 +1,5 @@
 name: LLVM Tests
 
-env:
-  release_major: 12
-
 on:
   push:
     branches:
@@ -42,7 +39,38 @@ jobs:
       with:
         cmake_args: -G Ninja -DCMAKE_BUILD_TYPE=Release
 
+  abi-dump-setup:
+    runs-on: ubuntu-latest
+    outputs:
+      BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }}
+      ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }}
+      BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }}
+      LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
+      LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }}
+      LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }}
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v1
+        with:
+          fetch-depth: 1
+
+      - name: Get LLVM version
+        id: version
+        uses: tstellar/actions/get-llvm-version@get-version
+
+      - name: Setup Variables
+        id: vars
+        run: |
+          if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then
+            echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))
+            echo ::set-output name=ABI_HEADERS::llvm-c
+          else
+            echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
+            echo ::set-output name=ABI_HEADERS::.
+          fi
+
   abi-dump:
+    needs: abi-dump-setup
     runs-on: ubuntu-latest
     strategy:
       matrix:
@@ -51,11 +79,11 @@ jobs:
           - build-latest
         include:
           - name: build-baseline
-            # FIXME: Referencing the env context does not work here
-            # ref: llvmorg-${{ env.release_major }}.0.0
-            ref: llvmorg-12.0.0
+            llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}
+            ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0
             repo: llvm/llvm-project
           - name: build-latest
+            llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }}
             ref: ${{ github.sha }}
             repo: ${{ github.repository }}
     steps:
@@ -78,22 +106,44 @@ jobs:
         repo: ${{ matrix.repo }}
     - name: Configure
       run: |
-        mkdir build
-        cd build
-        cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" ../llvm
+        mkdir install
+        cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm
     - name: Build
-      run: ninja -C build libLLVM-${{ env.release_major }}.so
+      # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug).
+      run: |
+        ninja -C build install-LLVM
+        ninja -C build install-LLVM
+        ninja -C build install-llvm-headers
     - name: Dump ABI
-      run: abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers llvm/include -o ${{ matrix.ref }}.abi.tar.gz build/lib/libLLVM-${{ env.release_major }}.so
+      run: |
+        if [ "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" ]; then
+          nm ./install/lib/libLLVM.so | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" | cut -d ' ' -f 3 > llvm.symbols
+          # Even though the -symbols-list option doesn't seem to filter out the symbols, I believe it speeds up processing, so I'm leaving it in.
+          export EXTRA_ARGS="-symbols-list llvm.symbols"
+        else
+          touch llvm.symbols
+        fi
+        abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so
+        # Remove symbol versioning from dumps, so we can compare across major versions.
+        sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi
+        tar -czf ${{ matrix.ref }}.abi.tar.gz ${{ matrix.ref }}.abi
     - name: Upload ABI file
       uses: actions/upload-artifact@v1
       with:
         name: ${{ matrix.name }}
         path: ${{ matrix.ref }}.abi.tar.gz
 
+    - name: Upload symbol list file
+      if: matrix.name == 'build-baseline'
+      uses: actions/upload-artifact@v1
+      with:
+        name: symbol-list
+        path: llvm.symbols
+
   abi-compare:
     runs-on: ubuntu-latest
     needs:
+      - abi-dump-setup
       - abi-dump
     steps:
       - name: Download baseline
@@ -104,10 +154,20 @@ jobs:
         uses: actions/download-artifact@v1
         with:
           name: build-latest
+      - name: Download symbol list
+        uses: actions/download-artifact@v1
+        with:
+          name: symbol-list
+
       - name: Install abi-compliance-checker
         run: sudo apt-get install abi-compliance-checker
       - name: Compare ABI
-        run: abi-compliance-checker -l libLLVM-${{ env.release_major}}.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz
+        run: |
+          if [ -s symbol-list/llvm.symbols ]; then
+            # This option doesn't seem to work with the ABI dumper, so passing it here.
+            export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols"
+          fi
+          abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.tar.gz -new build-latest/*.tar.gz || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c"
       - name: Upload ABI Comparison
         if: always()
         uses: actions/upload-artifact@v1

From b6d2402e319be00592908b2c9cb63fccdb481008 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 2 Feb 2021 15:08:17 +0200
Subject: [PATCH 026/244] [docs] Add release notes for things I've done for the
 12.x release branch.

---
 clang/docs/ReleaseNotes.rst |  3 +++
 lld/docs/ReleaseNotes.rst   | 20 ++++++++++++++++++--
 llvm/docs/ReleaseNotes.rst  | 19 +++++++++++++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3001d6feb631..a34cd512ca59 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -150,6 +150,9 @@ Attribute Changes in Clang
 Windows Support
 ---------------
 
+- Implicitly add ``.exe`` suffix for MinGW targets, even when cross compiling.
+  (This matches a change from GCC 8.)
+
 C Language Changes in Clang
 ---------------------------
 
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index e0b17ca3e030..ea1403888eba 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -35,12 +35,28 @@ Breaking changes
 COFF Improvements
 -----------------
 
-* ...
+* Error out clearly if creating a DLL with too many exported symbols.
+  (`D86701 <https://reviews.llvm.org/D86701>`_)
 
 MinGW Improvements
 ------------------
 
-* ...
+* Enabled dynamicbase by default. (`D86654 <https://reviews.llvm.org/D86654>`_)
+
+* Tolerate mismatches between COMDAT section sizes with different amount of
+  padding (produced by binutils) by inspecting the aux section definition.
+  (`D86659 <https://reviews.llvm.org/D86659>`_)
+
+* Support setting the subsystem version via the subsystem argument.
+  (`D88804 <https://reviews.llvm.org/D88804>`_)
+
+* Implemented the GNU -wrap option.
+  (`D89004 <https://reviews.llvm.org/D89004>`_,
+  `D91689 <https://reviews.llvm.org/D91689>`_)
+
+* Handle the ``--demangle`` and ``--no-demangle`` options.
+  (`D93950 <https://reviews.llvm.org/D93950>`_)
+
 
 MachO Improvements
 ------------------
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index de8431fe3908..f2eb53778406 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -92,6 +92,25 @@ Changes to TableGen
   uses the "`...`" range punctuation (e.g., ``{0...9}``). The hyphen syntax
   is deprecated.
 
+Changes to the AArch64 Backend
+--------------------------
+
+During this release ...
+
+* Lots of improvements to generation of Windows unwind data; the unwind
+  data is optimized and written in packed form where possible, reducing
+  the size of unwind data (pdata and xdata sections) by around 60%
+  compared with LLVM 11. The generation of prologs/epilogs is tweaked
+  when targeting Windows, to increase the chances of being able to use
+  the packed unwind info format.
+
+* Support for creating Windows unwind data using ``.seh_*`` assembler
+  directives.
+
+* Produce proper assembly output for the Windows target, including
+  ``:lo12:`` relocation specifiers, to allow the assembly output
+  to actually be assembled.
+
 Changes to the ARM Backend
 --------------------------
 

From 0db882a0f59afcd7f76d716ca2e04f2d6d92aa03 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 1 Feb 2021 10:48:29 -0800
Subject: [PATCH 027/244] workflows: Fix libclc tests

---
 .github/workflows/libclc-tests.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml
index 2f1eb2939ea2..188eecfc3b89 100644
--- a/.github/workflows/libclc-tests.yml
+++ b/.github/workflows/libclc-tests.yml
@@ -45,9 +45,9 @@ jobs:
         cmake_args: -G Ninja  -DLLVM_ENABLE_PROJECTS="clang" -DCMAKE_BUILD_TYPE=Release
         build_target: ""
     - name: Build and test libclc
+      # spirv targets require llvm-spirv, so skip building them until we figure out
+      # how to install this tool.
       run: |
-        mkdir libclc-build
-        cd libclc-build
-        cmake -G Ninja ../libclc -DLLVM_CONFIG=../build/bin/llvm-config
-        ninja
-        ninja test
+        cmake -G Ninja -S libclc -B libclc-build -DLLVM_CONFIG=`pwd`/build/bin/llvm-config -DLIBCLC_TARGETS_TO_BUILD="amdgcn--;amdgcn--amdhsa;r600--;nvptx--;nvptx64--;nvptx--nvidiacl;nvptx64--nvidiacl"
+        ninja -C libclc-build
+        ninja -C libclc-build test

From c0097c784179e6f927ed8ae6b28796faee2fea61 Mon Sep 17 00:00:00 2001
From: Atmn Patel <atmndp@gmail.com>
Date: Sun, 31 Jan 2021 19:18:41 -0500
Subject: [PATCH 028/244] [OpenMP][Libomptarget] Remove possible harmful copy
 constructor call for RTLsTy

From https://bugs.llvm.org/show_bug.cgi?id=48973, we know that
`std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs)` causes compile
time problems in libstdc++v3 5.3.1. This is because there was a defect in the
standard regarding the `call_once` (LWG 2442). This was fixed in libstdc++ soon
thereafter, but there are likely other standard libraries where this will fail.

By matching this function call with the other one, we fix this bug.

Differential Revision: https://reviews.llvm.org/D95769
---
 openmp/libomptarget/src/interface.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 239570935cb2..cf6d36960c75 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -94,7 +94,7 @@ EXTERN void __tgt_register_requires(int64_t flags) {
 /// adds a target shared library to the target execution image
 EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {
   TIMESCOPE();
-  std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, PM->RTLs);
+  std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
   for (auto &RTL : PM->RTLs.AllRTLs) {
     if (RTL.register_lib) {
       if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) {

From 162642bec0df760b27e66cfff046b40f1dfd2713 Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Thu, 4 Feb 2021 00:07:04 +0900
Subject: [PATCH 029/244] Revert "[ConstantFold] Fold more operations to
 poison"

This reverts commit 53040a968dc2ff20931661e55f05da2ef8b964a0 due to its
bad interaction with select i1 -> and/or i1 transformation.

This fixes:
https://bugs.llvm.org/show_bug.cgi?id=49005
https://bugs.llvm.org/show_bug.cgi?id=48435

(cherry picked from commit 06829034ca64b8c83a5b20d8abe5ddbfe7af0004)
---
 clang/test/Frontend/fixed_point_unary.c       |  4 +-
 llvm/lib/IR/ConstantFold.cpp                  | 59 ++++++-------
 ...amdgpu-codegenprepare-fold-binop-select.ll |  2 +-
 .../Transforms/InstCombine/apint-shift.ll     |  2 +-
 .../canonicalize-ashr-shl-to-masking.ll       |  2 +-
 .../canonicalize-lshr-shl-to-masking.ll       |  2 +-
 .../canonicalize-shl-lshr-to-masking.ll       |  2 +-
 llvm/test/Transforms/InstCombine/icmp.ll      |  4 +-
 ...nput-masking-after-truncation-variant-a.ll |  4 +-
 ...nput-masking-after-truncation-variant-b.ll |  4 +-
 ...nput-masking-after-truncation-variant-c.ll |  4 +-
 ...nput-masking-after-truncation-variant-d.ll |  4 +-
 ...nput-masking-after-truncation-variant-e.ll |  4 +-
 ...dant-left-shift-input-masking-variant-a.ll |  4 +-
 ...dant-left-shift-input-masking-variant-b.ll |  4 +-
 ...dant-left-shift-input-masking-variant-c.ll |  4 +-
 ...dant-left-shift-input-masking-variant-d.ll |  4 +-
 ...dant-left-shift-input-masking-variant-e.ll |  4 +-
 .../InstCombine/select-of-bittest.ll          |  6 +-
 .../InstCombine/shift-add-inseltpoison.ll     | 12 +--
 llvm/test/Transforms/InstCombine/shift-add.ll | 12 +--
 .../ConstProp/InsertElement-inseltpoison.ll   |  2 +-
 .../InstSimplify/ConstProp/InsertElement.ll   |  2 +-
 .../Transforms/InstSimplify/ConstProp/cast.ll |  4 +-
 .../InstSimplify/ConstProp/poison.ll          |  4 +-
 .../InstSimplify/ConstProp/shift.ll           | 24 ++---
 .../vector-undef-elts-inseltpoison.ll         |  2 +-
 .../ConstProp/vector-undef-elts.ll            |  2 +-
 .../ConstProp/vscale-inseltpoison.ll          | 16 ++--
 .../InstSimplify/ConstProp/vscale.ll          | 16 ++--
 llvm/test/Transforms/InstSimplify/div.ll      | 39 +--------
 llvm/test/Transforms/InstSimplify/rem.ll      | 31 +------
 llvm/test/Transforms/InstSimplify/undef.ll    | 87 +++++++++----------
 llvm/test/Transforms/SROA/phi-gep.ll          |  2 +-
 llvm/test/Transforms/SROA/select-gep.ll       |  2 +-
 .../X86/insert-binop-inseltpoison.ll          |  4 +-
 .../X86/insert-binop-with-constant.ll         | 42 ++++-----
 .../VectorCombine/X86/insert-binop.ll         |  6 +-
 llvm/unittests/IR/ConstantsTest.cpp           | 25 +++---
 39 files changed, 199 insertions(+), 258 deletions(-)

diff --git a/clang/test/Frontend/fixed_point_unary.c b/clang/test/Frontend/fixed_point_unary.c
index 6ce760daba11..849e38a94bc4 100644
--- a/clang/test/Frontend/fixed_point_unary.c
+++ b/clang/test/Frontend/fixed_point_unary.c
@@ -90,7 +90,7 @@ void inc_usa() {
 // SIGNED-LABEL: @inc_uf(
 // SIGNED-NEXT:  entry:
 // SIGNED-NEXT:    [[TMP0:%.*]] = load i16, i16* @uf, align 2
-// SIGNED-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], poison
+// SIGNED-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], undef
 // SIGNED-NEXT:    store i16 [[TMP1]], i16* @uf, align 2
 // SIGNED-NEXT:    ret void
 //
@@ -271,7 +271,7 @@ void dec_usa() {
 // SIGNED-LABEL: @dec_uf(
 // SIGNED-NEXT:  entry:
 // SIGNED-NEXT:    [[TMP0:%.*]] = load i16, i16* @uf, align 2
-// SIGNED-NEXT:    [[TMP1:%.*]] = sub i16 [[TMP0]], poison
+// SIGNED-NEXT:    [[TMP1:%.*]] = sub i16 [[TMP0]], undef
 // SIGNED-NEXT:    store i16 [[TMP1]], i16* @uf, align 2
 // SIGNED-NEXT:    ret void
 //
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 03cb108cc485..95dd55237e5f 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
           V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) {
         // Undefined behavior invoked - the destination type can't represent
         // the input constant.
-        return PoisonValue::get(DestTy);
+        return UndefValue::get(DestTy);
       }
       return ConstantInt::get(FPC->getContext(), IntVal);
     }
@@ -916,7 +916,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
 
   unsigned NumElts = ValTy->getNumElements();
   if (CIdx->uge(NumElts))
-    return PoisonValue::get(Val->getType());
+    return UndefValue::get(Val->getType());
 
   SmallVector<Constant*, 16> Result;
   Result.reserve(NumElts);
@@ -1151,21 +1151,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
     }
     case Instruction::SDiv:
     case Instruction::UDiv:
-      // X / undef -> poison
-      // X / 0 -> poison
-      if (match(C2, m_CombineOr(m_Undef(), m_Zero())))
-        return PoisonValue::get(C2->getType());
+      // X / undef -> undef
+      if (isa<UndefValue>(C2))
+        return C2;
+      // undef / 0 -> undef
       // undef / 1 -> undef
-      if (match(C2, m_One()))
+      if (match(C2, m_Zero()) || match(C2, m_One()))
         return C1;
       // undef / X -> 0       otherwise
       return Constant::getNullValue(C1->getType());
     case Instruction::URem:
     case Instruction::SRem:
-      // X % undef -> poison
-      // X % 0 -> poison
-      if (match(C2, m_CombineOr(m_Undef(), m_Zero())))
-        return PoisonValue::get(C2->getType());
+      // X % undef -> undef
+      if (match(C2, m_Undef()))
+        return C2;
+      // undef % 0 -> undef
+      if (match(C2, m_Zero()))
+        return C1;
       // undef % X -> 0       otherwise
       return Constant::getNullValue(C1->getType());
     case Instruction::Or:                          // X | undef -> -1
@@ -1173,28 +1175,28 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
         return C1;
       return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
     case Instruction::LShr:
-      // X >>l undef -> poison
+      // X >>l undef -> undef
       if (isa<UndefValue>(C2))
-        return PoisonValue::get(C2->getType());
+        return C2;
       // undef >>l 0 -> undef
       if (match(C2, m_Zero()))
         return C1;
       // undef >>l X -> 0
       return Constant::getNullValue(C1->getType());
     case Instruction::AShr:
-      // X >>a undef -> poison
+      // X >>a undef -> undef
       if (isa<UndefValue>(C2))
-        return PoisonValue::get(C2->getType());
+        return C2;
       // undef >>a 0 -> undef
       if (match(C2, m_Zero()))
         return C1;
-      // TODO: undef >>a X -> poison if the shift is exact
+      // TODO: undef >>a X -> undef if the shift is exact
       // undef >>a X -> 0
       return Constant::getNullValue(C1->getType());
     case Instruction::Shl:
       // X << undef -> undef
       if (isa<UndefValue>(C2))
-        return PoisonValue::get(C2->getType());
+        return C2;
       // undef << 0 -> undef
       if (match(C2, m_Zero()))
         return C1;
@@ -1247,14 +1249,14 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       if (CI2->isOne())
         return C1;                                            // X / 1 == X
       if (CI2->isZero())
-        return PoisonValue::get(CI2->getType());              // X / 0 == poison
+        return UndefValue::get(CI2->getType());               // X / 0 == undef
       break;
     case Instruction::URem:
     case Instruction::SRem:
       if (CI2->isOne())
         return Constant::getNullValue(CI2->getType());        // X % 1 == 0
       if (CI2->isZero())
-        return PoisonValue::get(CI2->getType());              // X % 0 == poison
+        return UndefValue::get(CI2->getType());               // X % 0 == undef
       break;
     case Instruction::And:
       if (CI2->isZero()) return C2;                           // X & 0 == 0
@@ -1368,7 +1370,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       case Instruction::SDiv:
         assert(!CI2->isZero() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
-          return PoisonValue::get(CI1->getType());   // MIN_INT / -1 -> poison
+          return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
         return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
       case Instruction::URem:
         assert(!CI2->isZero() && "Div by zero handled above");
@@ -1376,7 +1378,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       case Instruction::SRem:
         assert(!CI2->isZero() && "Div by zero handled above");
         if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
-          return PoisonValue::get(CI1->getType());   // MIN_INT % -1 -> poison
+          return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
         return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
       case Instruction::And:
         return ConstantInt::get(CI1->getContext(), C1V & C2V);
@@ -1387,15 +1389,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
       case Instruction::Shl:
         if (C2V.ult(C1V.getBitWidth()))
           return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
-        return PoisonValue::get(C1->getType()); // too big shift is poison
+        return UndefValue::get(C1->getType()); // too big shift is undef
       case Instruction::LShr:
         if (C2V.ult(C1V.getBitWidth()))
           return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
-        return PoisonValue::get(C1->getType()); // too big shift is poison
+        return UndefValue::get(C1->getType()); // too big shift is undef
       case Instruction::AShr:
         if (C2V.ult(C1V.getBitWidth()))
           return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
-        return PoisonValue::get(C1->getType()); // too big shift is poison
+        return UndefValue::get(C1->getType()); // too big shift is undef
       }
     }
 
@@ -1441,7 +1443,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
     // Fast path for splatted constants.
     if (Constant *C2Splat = C2->getSplatValue()) {
       if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue())
-        return PoisonValue::get(VTy);
+        return UndefValue::get(VTy);
       if (Constant *C1Splat = C1->getSplatValue()) {
         return ConstantVector::getSplat(
             VTy->getElementCount(),
@@ -1458,9 +1460,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
         Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
         Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
 
-        // If any element of a divisor vector is zero, the whole op is poison.
+        // If any element of a divisor vector is zero, the whole op is undef.
         if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
-          return PoisonValue::get(VTy);
+          return UndefValue::get(VTy);
 
         Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
       }
@@ -2343,8 +2345,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
     return PoisonValue::get(GEPTy);
 
   if (isa<UndefValue>(C))
-    // If inbounds, we can choose an out-of-bounds pointer as a base pointer.
-    return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
+    return UndefValue::get(GEPTy);
 
   Constant *Idx0 = cast<Constant>(Idxs[0]);
   if (Idxs.size() == 1 && (Idx0->isNullValue() || isa<UndefValue>(Idx0)))
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index e0037f0d8e45..bfe83c7a1285 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -42,7 +42,7 @@ define i32 @select_sdiv_rhs_const_i32(i1 %cond) {
 
 define <2 x i32> @select_sdiv_lhs_const_v2i32(i1 %cond) {
 ; IR-LABEL: @select_sdiv_lhs_const_v2i32(
-; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> <i32 666, i32 poison>, <2 x i32> <i32 555, i32 1428>
+; IR-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x i32> <i32 666, i32 undef>, <2 x i32> <i32 555, i32 1428>
 ; IR-NEXT:    ret <2 x i32> [[OP]]
 ;
 ; GCN-LABEL: select_sdiv_lhs_const_v2i32:
diff --git a/llvm/test/Transforms/InstCombine/apint-shift.ll b/llvm/test/Transforms/InstCombine/apint-shift.ll
index 908aeac0cea2..5a351efccfcc 100644
--- a/llvm/test/Transforms/InstCombine/apint-shift.ll
+++ b/llvm/test/Transforms/InstCombine/apint-shift.ll
@@ -337,7 +337,7 @@ define <2 x i1> @test16vec_nonuniform(<2 x i84> %X) {
 
 define <2 x i1> @test16vec_undef(<2 x i84> %X) {
 ; CHECK-LABEL: @test16vec_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], <i84 16, i84 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], <i84 16, i84 undef>
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
index 8d29372c3a72..ba0d32ee3768 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
@@ -418,7 +418,7 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) {
 
 define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef2(
-; CHECK-NEXT:    [[RET:%.*]] = and <3 x i8> [[X:%.*]], <i8 -8, i8 poison, i8 -8>
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i8> [[X:%.*]], <i8 -8, i8 undef, i8 -8>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %tmp0 = ashr <3 x i8> %x, <i8 3, i8 undef, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
index 40bc4aaab21c..445f6406b3d2 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
@@ -418,7 +418,7 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) {
 
 define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef2(
-; CHECK-NEXT:    [[RET:%.*]] = and <3 x i8> [[X:%.*]], <i8 -8, i8 poison, i8 -8>
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i8> [[X:%.*]], <i8 -8, i8 undef, i8 -8>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %tmp0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
index 45aa22aa808f..9de0b337de28 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -171,7 +171,7 @@ define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
 
 define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef2(
-; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 134217727, i32 poison, i32 134217727>
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 134217727, i32 undef, i32 134217727>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index b48466e678d8..5e6bed4e280f 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -2876,7 +2876,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform(<2 x i32> %x) {
 
 define <2 x i1> @icmp_and_or_lshr_cst_vec_undef(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 3, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 3, i32 -1>
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
@@ -2920,7 +2920,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) {
 define <2 x i1> @icmp_and_or_lshr_cst_vec_undef_commute(<2 x i32> %xp) {
 ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef_commute(
 ; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 -1>
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll
index e49c381fcd16..89c16a0949e8 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll
@@ -103,7 +103,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
-; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T7]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
@@ -138,7 +138,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
-; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <8 x i32> [[T7]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll
index 20f38deeb0d5..8aef637c6a74 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll
@@ -103,7 +103,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
-; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T7]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
@@ -138,7 +138,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
-; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <8 x i32> [[T7]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll
index 562280391c5e..61f25e6ca0b1 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll
@@ -83,7 +83,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
@@ -110,7 +110,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll
index aa644e6264e4..077bb8296f3e 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll
@@ -93,7 +93,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
-; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T6]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
@@ -124,7 +124,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
-; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T6]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll
index f2aa2894e27a..961ea5e48416 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll
@@ -83,7 +83,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
@@ -110,7 +110,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
index 882117fe3480..41a71aa98f40 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
@@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
@@ -109,7 +109,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
index e92875d79207..787135229148 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
@@ -82,7 +82,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
@@ -109,7 +109,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 poison>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
index b8066cef2b40..c0959d9e1ac6 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
@@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %nbits
@@ -81,7 +81,7 @@ define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %nbits
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
index 20b322c0b647..5e0f0be2b1ad 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
@@ -72,7 +72,7 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]]
-; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T4]]
 ;
   %t0 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %nbits
@@ -95,7 +95,7 @@ define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]]
-; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison>
+; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T4]]
 ;
   %t0 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %nbits
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
index 46f5b0c2f213..2e335f0083c1 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
@@ -62,7 +62,7 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = shl <8 x i32> %x, %nbits
@@ -81,7 +81,7 @@ define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 poison, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef>
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = shl <8 x i32> %x, %nbits
diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
index c85bcba82e97..d9bef00b2f78 100644
--- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll
+++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
@@ -82,7 +82,7 @@ define <2 x i32> @and_lshr_and_vec_v2(<2 x i32> %arg) {
 
 define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) {
 ; CHECK-LABEL: @and_lshr_and_vec_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 undef, i32 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32>
 ; CHECK-NEXT:    ret <3 x i32> [[TMP4]]
@@ -91,7 +91,6 @@ define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) {
   %tmp1 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
   %tmp2 = lshr <3 x i32> %arg, <i32 1, i32 undef, i32 1>
   %tmp3 = and <3 x i32> %tmp2, <i32 1, i32 undef, i32 1>
-  ; The second element of %tmp4 is poison because it is (undef ? poison : undef).
   %tmp4 = select <3 x i1> %tmp1, <3 x i32> %tmp3, <3 x i32> <i32 1, i32 undef, i32 1>
   ret <3 x i32> %tmp4
 }
@@ -223,7 +222,7 @@ define <2 x i32> @f_var0_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 
 define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) {
 ; CHECK-LABEL: @f_var0_vec_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 2, i32 poison, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 2, i32 undef, i32 2>
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32>
@@ -233,7 +232,6 @@ define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) {
   %tmp2 = icmp eq <3 x i32> %tmp, <i32 0, i32 undef, i32 0>
   %tmp3 = lshr <3 x i32> %arg, <i32 1, i32 undef, i32 1>
   %tmp4 = and <3 x i32> %tmp3, <i32 1, i32 undef, i32 1>
-  ; The second element of %tmp5 is poison because it is (undef ? poison : undef).
   %tmp5 = select <3 x i1> %tmp2, <3 x i32> %tmp4, <3 x i32> <i32 1, i32 undef, i32 1>
   ret <3 x i32> %tmp5
 }
diff --git a/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll
index 3232cdc49c0f..e968f13c40b0 100644
--- a/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/shift-add-inseltpoison.ll
@@ -40,7 +40,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
 define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
 ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
 ; CHECK-NEXT:    [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[B]]
+; CHECK-NEXT:    [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[B]]
 ; CHECK-NEXT:    ret <4 x i32> [[D]]
 ;
   %B = zext <4 x i16> %A to <4 x i32>
@@ -52,7 +52,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
 define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
 ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
 ; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
-; CHECK-NEXT:    [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[B]]
+; CHECK-NEXT:    [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[B]]
 ; CHECK-NEXT:    ret <4 x i32> [[D]]
 ;
   %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
@@ -64,7 +64,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
 define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
 ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
 ; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
-; CHECK-NEXT:    [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[B]]
+; CHECK-NEXT:    [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[B]]
 ; CHECK-NEXT:    ret <4 x i32> [[D]]
 ;
   %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
@@ -78,7 +78,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) {
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[C]]
+; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
@@ -94,7 +94,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) {
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[C]]
+; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
@@ -110,7 +110,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) {
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[C]]
+; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll
index eea8b7a074d7..e227274f4930 100644
--- a/llvm/test/Transforms/InstCombine/shift-add.ll
+++ b/llvm/test/Transforms/InstCombine/shift-add.ll
@@ -40,7 +40,7 @@ define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
 define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
 ; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
 ; CHECK-NEXT:    [[B:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[B]]
+; CHECK-NEXT:    [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[B]]
 ; CHECK-NEXT:    ret <4 x i32> [[D]]
 ;
   %B = zext <4 x i16> %A to <4 x i32>
@@ -52,7 +52,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
 define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
 ; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
 ; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
-; CHECK-NEXT:    [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[B]]
+; CHECK-NEXT:    [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[B]]
 ; CHECK-NEXT:    ret <4 x i32> [[D]]
 ;
   %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
@@ -64,7 +64,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
 define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
 ; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
 ; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 255, i32 65535>
-; CHECK-NEXT:    [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[B]]
+; CHECK-NEXT:    [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[B]]
 ; CHECK-NEXT:    ret <4 x i32> [[D]]
 ;
   %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
@@ -78,7 +78,7 @@ define <4 x i32> @shl_C1_add_A_C2_v4i32_splat(i16 %I) {
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 poison, i32 -458752>, [[C]]
+; CHECK-NEXT:    [[E:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
@@ -94,7 +94,7 @@ define <4 x i32> @ashr_C1_add_A_C2_v4i32_splat(i16 %I) {
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 poison, i32 -1>, [[C]]
+; CHECK-NEXT:    [[E:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
@@ -110,7 +110,7 @@ define <4 x i32> @lshr_C1_add_A_C2_v4i32_splat(i16 %I) {
 ; CHECK-NEXT:    [[A:%.*]] = zext i16 [[I:%.*]] to i32
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <4 x i32> undef, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 poison, i32 65535>, [[C]]
+; CHECK-NEXT:    [[E:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[C]]
 ; CHECK-NEXT:    ret <4 x i32> [[E]]
 ;
   %A = zext i16 %I to i32
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll
index 54b862c8514a..197e7cc0ac75 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement-inseltpoison.ll
@@ -25,7 +25,7 @@ define <4 x i64> @insertelement() {
 
 define <4 x i64> @insertelement_undef() {
 ; CHECK-LABEL: @insertelement_undef(
-; CHECK-NEXT:    ret <4 x i64> poison
+; CHECK-NEXT:    ret <4 x i64> undef
 ;
   %vec1 = insertelement <4 x i64> poison, i64 -1, i32 0
   %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
index 127c1692b5b8..a9a27a5df01f 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/InsertElement.ll
@@ -25,7 +25,7 @@ define <4 x i64> @insertelement() {
 
 define <4 x i64> @insertelement_undef() {
 ; CHECK-LABEL: @insertelement_undef(
-; CHECK-NEXT:    ret <4 x i64> poison
+; CHECK-NEXT:    ret <4 x i64> undef
 ;
   %vec1 = insertelement <4 x i64> undef, i64 -1, i32 0
   %vec2 = insertelement <4 x i64> %vec1, i64 -2, i32 1
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
index 1136151f7157..adf5e4b68a1b 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
@@ -5,7 +5,7 @@
 
 define i8 @overflow_fptosi() {
 ; CHECK-LABEL: @overflow_fptosi(
-; CHECK-NEXT:    ret i8 poison
+; CHECK-NEXT:    ret i8 undef
 ;
   %i = fptosi double 1.56e+02 to i8
   ret i8 %i
@@ -13,7 +13,7 @@ define i8 @overflow_fptosi() {
 
 define i8 @overflow_fptoui() {
 ; CHECK-LABEL: @overflow_fptoui(
-; CHECK-NEXT:    ret i8 poison
+; CHECK-NEXT:    ret i8 undef
 ;
   %i = fptoui double 2.56e+02 to i8
   ret i8 %i
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll
index f3fe29ff57ba..ea34bb4699e6 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/poison.ll
@@ -104,14 +104,14 @@ define void @vec_aggr_ops() {
 
 define void @other_ops(i8 %x) {
 ; CHECK-LABEL: @other_ops(
-; CHECK-NEXT:    call void (...) @use(i1 poison, i1 poison, i8 poison, i8 poison, i8* poison, i8* poison)
+; CHECK-NEXT:    call void (...) @use(i1 poison, i1 poison, i8 poison, i8 poison, i8* poison)
 ; CHECK-NEXT:    ret void
 ;
   %i1 = icmp eq i8 poison, 1
   %i2 = fcmp oeq float poison, 1.0
   %i3 = select i1 poison, i8 1, i8 2
   %i4 = select i1 true, i8 poison, i8 %x
-  call void (...) @use(i1 %i1, i1 %i2, i8 %i3, i8 %i4, i8* getelementptr (i8, i8* poison, i64 1), i8* getelementptr inbounds (i8, i8* undef, i64 1))
+  call void (...) @use(i1 %i1, i1 %i2, i8 %i3, i8 %i4, i8* getelementptr (i8, i8* poison, i64 1))
   ret void
 }
 
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll b/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll
index a7a60e562117..3e64513533ff 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/shift.ll
@@ -3,15 +3,15 @@
 ; CHECK-LABEL: shift_undef_64
 define void @shift_undef_64(i64* %p) {
   %r1 = lshr i64 -1, 4294967296 ; 2^32
-  ; CHECK: store i64 poison
+  ; CHECK: store i64 undef
   store i64 %r1, i64* %p
 
   %r2 = ashr i64 -1, 4294967297 ; 2^32 + 1
-  ; CHECK: store i64 poison
+  ; CHECK: store i64 undef
   store i64 %r2, i64* %p
 
   %r3 = shl i64 -1, 4294967298 ; 2^32 + 2
-  ; CHECK: store i64 poison
+  ; CHECK: store i64 undef
   store i64 %r3, i64* %p
 
   ret void
@@ -20,15 +20,15 @@ define void @shift_undef_64(i64* %p) {
 ; CHECK-LABEL: shift_undef_65
 define void @shift_undef_65(i65* %p) {
   %r1 = lshr i65 2, 18446744073709551617
-  ; CHECK: store i65 poison
+  ; CHECK: store i65 undef
   store i65 %r1, i65* %p
 
   %r2 = ashr i65 4, 18446744073709551617
-  ; CHECK: store i65 poison
+  ; CHECK: store i65 undef
   store i65 %r2, i65* %p
 
   %r3 = shl i65 1, 18446744073709551617
-  ; CHECK: store i65 poison
+  ; CHECK: store i65 undef
   store i65 %r3, i65* %p
 
   ret void
@@ -37,15 +37,15 @@ define void @shift_undef_65(i65* %p) {
 ; CHECK-LABEL: shift_undef_256
 define void @shift_undef_256(i256* %p) {
   %r1 = lshr i256 2, 18446744073709551617
-  ; CHECK: store i256 poison
+  ; CHECK: store i256 undef
   store i256 %r1, i256* %p
 
   %r2 = ashr i256 4, 18446744073709551618
-  ; CHECK: store i256 poison
+  ; CHECK: store i256 undef
   store i256 %r2, i256* %p
 
   %r3 = shl i256 1, 18446744073709551619
-  ; CHECK: store i256 poison
+  ; CHECK: store i256 undef
   store i256 %r3, i256* %p
 
   ret void
@@ -54,15 +54,15 @@ define void @shift_undef_256(i256* %p) {
 ; CHECK-LABEL: shift_undef_511
 define void @shift_undef_511(i511* %p) {
   %r1 = lshr i511 -1, 1208925819614629174706276 ; 2^80 + 100
-  ; CHECK: store i511 poison
+  ; CHECK: store i511 undef
   store i511 %r1, i511* %p
 
   %r2 = ashr i511 -2, 1208925819614629174706200
-  ; CHECK: store i511 poison
+  ; CHECK: store i511 undef
   store i511 %r2, i511* %p
 
   %r3 = shl i511 -3, 1208925819614629174706180
-  ; CHECK: store i511 poison
+  ; CHECK: store i511 undef
   store i511 %r3, i511* %p
 
   ret void
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll
index 6ce03dd2e0f0..2762291d7954 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll
@@ -5,7 +5,7 @@
 
 define <3 x i8> @shl() {
 ; CHECK-LABEL: @shl(
-; CHECK-NEXT:    ret <3 x i8> <i8 poison, i8 0, i8 0>
+; CHECK-NEXT:    ret <3 x i8> <i8 undef, i8 0, i8 0>
 ;
   %c = shl <3 x i8> undef, <i8 undef, i8 4, i8 1>
   ret <3 x i8> %c
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll
index 99cc2527d12e..5d0f484bc3fd 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts.ll
@@ -5,7 +5,7 @@
 
 define <3 x i8> @shl() {
 ; CHECK-LABEL: @shl(
-; CHECK-NEXT:    ret <3 x i8> <i8 poison, i8 0, i8 0>
+; CHECK-NEXT:    ret <3 x i8> <i8 undef, i8 0, i8 0>
 ;
   %c = shl <3 x i8> undef, <i8 undef, i8 4, i8 1>
   ret <3 x i8> %c
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
index 9689887be69b..ee19e617748b 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale-inseltpoison.ll
@@ -75,7 +75,7 @@ define <vscale x 4 x float> @fmul() {
 
 define <vscale x 4 x i32> @udiv() {
 ; CHECK-LABEL: @udiv(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = udiv <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -83,7 +83,7 @@ define <vscale x 4 x i32> @udiv() {
 
 define <vscale x 4 x i32> @udiv_splat_zero() {
 ; CHECK-LABEL: @udiv_splat_zero(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = udiv <vscale x 4 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 4 x i32> %r
@@ -91,7 +91,7 @@ define <vscale x 4 x i32> @udiv_splat_zero() {
 
 define <vscale x 4 x i32> @sdiv() {
 ; CHECK-LABEL: @sdiv(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = sdiv <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -107,7 +107,7 @@ define <vscale x 4 x float> @fdiv() {
 
 define <vscale x 4 x i32> @urem() {
 ; CHECK-LABEL: @urem(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = urem <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -115,7 +115,7 @@ define <vscale x 4 x i32> @urem() {
 
 define <vscale x 4 x i32> @srem() {
 ; CHECK-LABEL: @srem(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = srem <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -135,7 +135,7 @@ define <vscale x 4 x float> @frem() {
 
 define <vscale x 4 x i32> @shl() {
 ; CHECK-LABEL: @shl(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = shl <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -143,7 +143,7 @@ define <vscale x 4 x i32> @shl() {
 
 define <vscale x 4 x i32> @lshr() {
 ; CHECK-LABEL: @lshr(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = lshr <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -151,7 +151,7 @@ define <vscale x 4 x i32> @lshr() {
 
 define <vscale x 4 x i32> @ashr() {
 ; CHECK-LABEL: @ashr(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = ashr <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
index 048e8840ffd8..66e4c93e1968 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vscale.ll
@@ -75,7 +75,7 @@ define <vscale x 4 x float> @fmul() {
 
 define <vscale x 4 x i32> @udiv() {
 ; CHECK-LABEL: @udiv(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = udiv <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -83,7 +83,7 @@ define <vscale x 4 x i32> @udiv() {
 
 define <vscale x 4 x i32> @udiv_splat_zero() {
 ; CHECK-LABEL: @udiv_splat_zero(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = udiv <vscale x 4 x i32> zeroinitializer, zeroinitializer
   ret <vscale x 4 x i32> %r
@@ -91,7 +91,7 @@ define <vscale x 4 x i32> @udiv_splat_zero() {
 
 define <vscale x 4 x i32> @sdiv() {
 ; CHECK-LABEL: @sdiv(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = sdiv <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -107,7 +107,7 @@ define <vscale x 4 x float> @fdiv() {
 
 define <vscale x 4 x i32> @urem() {
 ; CHECK-LABEL: @urem(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = urem <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -115,7 +115,7 @@ define <vscale x 4 x i32> @urem() {
 
 define <vscale x 4 x i32> @srem() {
 ; CHECK-LABEL: @srem(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = srem <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -135,7 +135,7 @@ define <vscale x 4 x float> @frem() {
 
 define <vscale x 4 x i32> @shl() {
 ; CHECK-LABEL: @shl(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = shl <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -143,7 +143,7 @@ define <vscale x 4 x i32> @shl() {
 
 define <vscale x 4 x i32> @lshr() {
 ; CHECK-LABEL: @lshr(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = lshr <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
@@ -151,7 +151,7 @@ define <vscale x 4 x i32> @lshr() {
 
 define <vscale x 4 x i32> @ashr() {
 ; CHECK-LABEL: @ashr(
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
 ;
   %r = ashr <vscale x 4 x i32> undef, undef
   ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll
index 7c8efc27d3aa..5a3e6e8f7daa 100644
--- a/llvm/test/Transforms/InstSimplify/div.ll
+++ b/llvm/test/Transforms/InstSimplify/div.ll
@@ -25,11 +25,11 @@ define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) {
   ret <2 x i32> %B
 }
 
-; Division-by-zero is poison. UB in any vector lane means the whole op is poison.
+; Division-by-zero is undef. UB in any vector lane means the whole op is undef.
 
 define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_zero_elt_vec_constfold(
-; CHECK-NEXT:    ret <2 x i8> poison
+; CHECK-NEXT:    ret <2 x i8> undef
 ;
   %div = sdiv <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
   ret <2 x i8> %div
@@ -37,7 +37,7 @@ define <2 x i8> @sdiv_zero_elt_vec_constfold(<2 x i8> %x) {
 
 define <2 x i8> @udiv_zero_elt_vec_constfold(<2 x i8> %x) {
 ; CHECK-LABEL: @udiv_zero_elt_vec_constfold(
-; CHECK-NEXT:    ret <2 x i8> poison
+; CHECK-NEXT:    ret <2 x i8> undef
 ;
   %div = udiv <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
   ret <2 x i8> %div
@@ -193,37 +193,4 @@ define i32 @div1() {
   ret i32 %urem
 }
 
-define i8 @sdiv_minusone_divisor() {
-; CHECK-LABEL: @sdiv_minusone_divisor(
-; CHECK-NEXT:    ret i8 poison
-;
-  %v = sdiv i8 -128, -1
-  ret i8 %v
-}
-
-define i32 @poison(i32 %x) {
-; CHECK-LABEL: @poison(
-; CHECK-NEXT:    ret i32 poison
-;
-  %v = udiv i32 %x, poison
-  ret i32 %v
-}
-
-; TODO: this should be poison
-define i32 @poison2(i32 %x) {
-; CHECK-LABEL: @poison2(
-; CHECK-NEXT:    ret i32 0
-;
-  %v = udiv i32 poison, %x
-  ret i32 %v
-}
-
-define <2 x i32> @poison3(<2 x i32> %x) {
-; CHECK-LABEL: @poison3(
-; CHECK-NEXT:    ret <2 x i32> poison
-;
-  %v = udiv <2 x i32> %x, <i32 poison, i32 1>
-  ret <2 x i32> %v
-}
-
 !0 = !{i32 0, i32 3}
diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll
index 6aaeb5c70d00..6ccb6474ce44 100644
--- a/llvm/test/Transforms/InstSimplify/rem.ll
+++ b/llvm/test/Transforms/InstSimplify/rem.ll
@@ -25,11 +25,11 @@ define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) {
   ret <2 x i32> %B
 }
 
-; Division-by-zero is poison. UB in any vector lane means the whole op is poison.
+; Division-by-zero is undef. UB in any vector lane means the whole op is undef.
 
 define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) {
 ; CHECK-LABEL: @srem_zero_elt_vec_constfold(
-; CHECK-NEXT:    ret <2 x i8> poison
+; CHECK-NEXT:    ret <2 x i8> undef
 ;
   %rem = srem <2 x i8> <i8 1, i8 2>, <i8 0, i8 -42>
   ret <2 x i8> %rem
@@ -37,7 +37,7 @@ define <2 x i8> @srem_zero_elt_vec_constfold(<2 x i8> %x) {
 
 define <2 x i8> @urem_zero_elt_vec_constfold(<2 x i8> %x) {
 ; CHECK-LABEL: @urem_zero_elt_vec_constfold(
-; CHECK-NEXT:    ret <2 x i8> poison
+; CHECK-NEXT:    ret <2 x i8> undef
 ;
   %rem = urem <2 x i8> <i8 1, i8 2>, <i8 42, i8 0>
   ret <2 x i8> %rem
@@ -325,28 +325,3 @@ define <2 x i32> @srem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i32> %y) {
   ret <2 x i32> %r
 }
 
-define i8 @srem_minusone_divisor() {
-; CHECK-LABEL: @srem_minusone_divisor(
-; CHECK-NEXT:    ret i8 poison
-;
-  %v = srem i8 -128, -1
-  ret i8 %v
-}
-
-define i32 @poison(i32 %x) {
-; CHECK-LABEL: @poison(
-; CHECK-NEXT:    ret i32 poison
-;
-  %v = urem i32 %x, poison
-  ret i32 %v
-}
-
-; TODO: this should be poison
-
-define i32 @poison2(i32 %x) {
-; CHECK-LABEL: @poison2(
-; CHECK-NEXT:    ret i32 0
-;
-  %v = urem i32 poison, %x
-  ret i32 %v
-}
diff --git a/llvm/test/Transforms/InstSimplify/undef.ll b/llvm/test/Transforms/InstSimplify/undef.ll
index d09dc43da091..fe1f412d3d37 100644
--- a/llvm/test/Transforms/InstSimplify/undef.ll
+++ b/llvm/test/Transforms/InstSimplify/undef.ll
@@ -1,9 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -instsimplify -S < %s | FileCheck %s
 
 define i64 @test0() {
 ; CHECK-LABEL: @test0(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = mul i64 undef, undef
   ret i64 %r
@@ -11,7 +10,7 @@ define i64 @test0() {
 
 define i64 @test1() {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = mul i64 3, undef
   ret i64 %r
@@ -19,7 +18,7 @@ define i64 @test1() {
 
 define i64 @test2() {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = mul i64 undef, 3
   ret i64 %r
@@ -27,7 +26,7 @@ define i64 @test2() {
 
 define i64 @test3() {
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    ret i64 0
+; CHECK:         ret i64 0
 ;
   %r = mul i64 undef, 6
   ret i64 %r
@@ -35,7 +34,7 @@ define i64 @test3() {
 
 define i64 @test4() {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    ret i64 0
+; CHECK:         ret i64 0
 ;
   %r = mul i64 6, undef
   ret i64 %r
@@ -43,7 +42,7 @@ define i64 @test4() {
 
 define i64 @test5() {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = and i64 undef, undef
   ret i64 %r
@@ -51,7 +50,7 @@ define i64 @test5() {
 
 define i64 @test6() {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = or i64 undef, undef
   ret i64 %r
@@ -59,7 +58,7 @@ define i64 @test6() {
 
 define i64 @test7() {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = udiv i64 undef, 1
   ret i64 %r
@@ -67,7 +66,7 @@ define i64 @test7() {
 
 define i64 @test8() {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = sdiv i64 undef, 1
   ret i64 %r
@@ -75,7 +74,7 @@ define i64 @test8() {
 
 define i64 @test9() {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    ret i64 0
+; CHECK:         ret i64 0
 ;
   %r = urem i64 undef, 1
   ret i64 %r
@@ -83,7 +82,7 @@ define i64 @test9() {
 
 define i64 @test10() {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    ret i64 0
+; CHECK:         ret i64 0
 ;
   %r = srem i64 undef, 1
   ret i64 %r
@@ -91,7 +90,7 @@ define i64 @test10() {
 
 define i64 @test11() {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    ret i64 poison
+; CHECK:         ret i64 undef
 ;
   %r = shl i64 undef, undef
   ret i64 %r
@@ -99,7 +98,7 @@ define i64 @test11() {
 
 define i64 @test11b(i64 %a) {
 ; CHECK-LABEL: @test11b(
-; CHECK-NEXT:    ret i64 poison
+; CHECK:         ret i64 poison
 ;
   %r = shl i64 %a, undef
   ret i64 %r
@@ -107,7 +106,7 @@ define i64 @test11b(i64 %a) {
 
 define i64 @test12() {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    ret i64 poison
+; CHECK:         ret i64 undef
 ;
   %r = ashr i64 undef, undef
   ret i64 %r
@@ -115,7 +114,7 @@ define i64 @test12() {
 
 define i64 @test12b(i64 %a) {
 ; CHECK-LABEL: @test12b(
-; CHECK-NEXT:    ret i64 poison
+; CHECK:         ret i64 poison
 ;
   %r = ashr i64 %a, undef
   ret i64 %r
@@ -123,7 +122,7 @@ define i64 @test12b(i64 %a) {
 
 define i64 @test13() {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    ret i64 poison
+; CHECK:         ret i64 undef
 ;
   %r = lshr i64 undef, undef
   ret i64 %r
@@ -131,7 +130,7 @@ define i64 @test13() {
 
 define i64 @test13b(i64 %a) {
 ; CHECK-LABEL: @test13b(
-; CHECK-NEXT:    ret i64 poison
+; CHECK:         ret i64 poison
 ;
   %r = lshr i64 %a, undef
   ret i64 %r
@@ -139,7 +138,7 @@ define i64 @test13b(i64 %a) {
 
 define i1 @test14() {
 ; CHECK-LABEL: @test14(
-; CHECK-NEXT:    ret i1 undef
+; CHECK:         ret i1 undef
 ;
   %r = icmp slt i64 undef, undef
   ret i1 %r
@@ -147,7 +146,7 @@ define i1 @test14() {
 
 define i1 @test15() {
 ; CHECK-LABEL: @test15(
-; CHECK-NEXT:    ret i1 undef
+; CHECK:         ret i1 undef
 ;
   %r = icmp ult i64 undef, undef
   ret i1 %r
@@ -155,7 +154,7 @@ define i1 @test15() {
 
 define i64 @test16(i64 %a) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = select i1 undef, i64 %a, i64 undef
   ret i64 %r
@@ -163,7 +162,7 @@ define i64 @test16(i64 %a) {
 
 define i64 @test17(i64 %a) {
 ; CHECK-LABEL: @test17(
-; CHECK-NEXT:    ret i64 undef
+; CHECK:         ret i64 undef
 ;
   %r = select i1 undef, i64 undef, i64 %a
   ret i64 %r
@@ -171,7 +170,7 @@ define i64 @test17(i64 %a) {
 
 define i64 @test18(i64 %a) {
 ; CHECK-LABEL: @test18(
-; CHECK-NEXT:    [[R:%.*]] = call i64 undef(i64 [[A:%.*]])
+; CHECK:         [[R:%.*]] = call i64 undef(i64 %a)
 ; CHECK-NEXT:    ret i64 poison
 ;
   %r = call i64 (i64) undef(i64 %a)
@@ -180,7 +179,7 @@ define i64 @test18(i64 %a) {
 
 define <4 x i8> @test19(<4 x i8> %a) {
 ; CHECK-LABEL: @test19(
-; CHECK-NEXT:    ret <4 x i8> poison
+; CHECK:         ret <4 x i8> poison
 ;
   %b = shl <4 x i8> %a, <i8 8, i8 9, i8 undef, i8 -1>
   ret <4 x i8> %b
@@ -188,7 +187,7 @@ define <4 x i8> @test19(<4 x i8> %a) {
 
 define i32 @test20(i32 %a) {
 ; CHECK-LABEL: @test20(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 poison
 ;
   %b = udiv i32 %a, 0
   ret i32 %b
@@ -204,7 +203,7 @@ define <2 x i32> @test20vec(<2 x i32> %a) {
 
 define i32 @test21(i32 %a) {
 ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 poison
 ;
   %b = sdiv i32 %a, 0
   ret i32 %b
@@ -220,7 +219,7 @@ define <2 x i32> @test21vec(<2 x i32> %a) {
 
 define i32 @test22(i32 %a) {
 ; CHECK-LABEL: @test22(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = ashr exact i32 undef, %a
   ret i32 %b
@@ -228,7 +227,7 @@ define i32 @test22(i32 %a) {
 
 define i32 @test23(i32 %a) {
 ; CHECK-LABEL: @test23(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = lshr exact i32 undef, %a
   ret i32 %b
@@ -236,7 +235,7 @@ define i32 @test23(i32 %a) {
 
 define i32 @test24() {
 ; CHECK-LABEL: @test24(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 undef
 ;
   %b = udiv i32 undef, 0
   ret i32 %b
@@ -244,7 +243,7 @@ define i32 @test24() {
 
 define i32 @test25() {
 ; CHECK-LABEL: @test25(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 undef
 ;
   %b = lshr i32 0, undef
   ret i32 %b
@@ -252,7 +251,7 @@ define i32 @test25() {
 
 define i32 @test26() {
 ; CHECK-LABEL: @test26(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 undef
 ;
   %b = ashr i32 0, undef
   ret i32 %b
@@ -260,7 +259,7 @@ define i32 @test26() {
 
 define i32 @test27() {
 ; CHECK-LABEL: @test27(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 undef
 ;
   %b = shl i32 0, undef
   ret i32 %b
@@ -268,7 +267,7 @@ define i32 @test27() {
 
 define i32 @test28(i32 %a) {
 ; CHECK-LABEL: @test28(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = shl nsw i32 undef, %a
   ret i32 %b
@@ -276,7 +275,7 @@ define i32 @test28(i32 %a) {
 
 define i32 @test29(i32 %a) {
 ; CHECK-LABEL: @test29(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = shl nuw i32 undef, %a
   ret i32 %b
@@ -284,7 +283,7 @@ define i32 @test29(i32 %a) {
 
 define i32 @test30(i32 %a) {
 ; CHECK-LABEL: @test30(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = shl nsw nuw i32 undef, %a
   ret i32 %b
@@ -292,7 +291,7 @@ define i32 @test30(i32 %a) {
 
 define i32 @test31(i32 %a) {
 ; CHECK-LABEL: @test31(
-; CHECK-NEXT:    ret i32 0
+; CHECK:         ret i32 0
 ;
   %b = shl i32 undef, %a
   ret i32 %b
@@ -300,7 +299,7 @@ define i32 @test31(i32 %a) {
 
 define i32 @test32(i32 %a) {
 ; CHECK-LABEL: @test32(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = shl i32 undef, 0
   ret i32 %b
@@ -308,7 +307,7 @@ define i32 @test32(i32 %a) {
 
 define i32 @test33(i32 %a) {
 ; CHECK-LABEL: @test33(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = ashr i32 undef, 0
   ret i32 %b
@@ -316,7 +315,7 @@ define i32 @test33(i32 %a) {
 
 define i32 @test34(i32 %a) {
 ; CHECK-LABEL: @test34(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = lshr i32 undef, 0
   ret i32 %b
@@ -324,7 +323,7 @@ define i32 @test34(i32 %a) {
 
 define i32 @test35(<4 x i32> %V) {
 ; CHECK-LABEL: @test35(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 poison
 ;
   %b = extractelement <4 x i32> %V, i32 4
   ret i32 %b
@@ -332,7 +331,7 @@ define i32 @test35(<4 x i32> %V) {
 
 define i32 @test36(i32 %V) {
 ; CHECK-LABEL: @test36(
-; CHECK-NEXT:    ret i32 undef
+; CHECK:         ret i32 undef
 ;
   %b = extractelement <4 x i32> undef, i32 %V
   ret i32 %b
@@ -340,7 +339,7 @@ define i32 @test36(i32 %V) {
 
 define i32 @test37() {
 ; CHECK-LABEL: @test37(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 undef
 ;
   %b = udiv i32 undef, undef
   ret i32 %b
@@ -348,7 +347,7 @@ define i32 @test37() {
 
 define i32 @test38(i32 %a) {
 ; CHECK-LABEL: @test38(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 poison
 ;
   %b = udiv i32 %a, undef
   ret i32 %b
@@ -356,7 +355,7 @@ define i32 @test38(i32 %a) {
 
 define i32 @test39() {
 ; CHECK-LABEL: @test39(
-; CHECK-NEXT:    ret i32 poison
+; CHECK:         ret i32 undef
 ;
   %b = udiv i32 0, undef
   ret i32 %b
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index 915ae546beda..6bf2a7718658 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -348,7 +348,7 @@ define void @test_sroa_gep_phi_select_same_block() {
 ; CHECK-NEXT:    [[PHI:%.*]] = phi %pair* [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ]
 ; CHECK-NEXT:    [[SELECT]] = select i1 undef, %pair* [[PHI]], %pair* undef
 ; CHECK-NEXT:    [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[PHI]], i64 1
-; CHECK-NEXT:    [[SELECT_SROA_SEL:%.*]] = select i1 undef, %pair* [[PHI_SROA_GEP]], %pair* poison
+; CHECK-NEXT:    [[SELECT_SROA_SEL:%.*]] = select i1 undef, %pair* [[PHI_SROA_GEP]], %pair* undef
 ; CHECK-NEXT:    br i1 undef, label [[EXIT:%.*]], label [[WHILE_BODY]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    unreachable
diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll
index f69cfeb410bd..93cb3420d0af 100644
--- a/llvm/test/Transforms/SROA/select-gep.ll
+++ b/llvm/test/Transforms/SROA/select-gep.ll
@@ -83,7 +83,7 @@ define i32 @test_sroa_select_gep_undef(i1 %cond) {
 ; CHECK-LABEL: @test_sroa_select_gep_undef(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* poison
+; CHECK-NEXT:    [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], i32* [[A_SROA_0]], i32* undef
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[SELECT_SROA_SEL]], align 4
 ; CHECK-NEXT:    ret i32 [[LOAD]]
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
index 8a6b1e98c968..b9d82e9f81df 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-inseltpoison.ll
@@ -128,7 +128,7 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) {
 define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) {
 ; CHECK-LABEL: @ins1_ins1_udiv(
 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 6, i64 poison>, i64 [[R_SCALAR]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 6, i64 undef>, i64 [[R_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %i0 = insertelement <2 x i64> <i64 42, i64 undef>, i64 %x, i32 1
@@ -143,7 +143,7 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) {
 define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
 ; CHECK-LABEL: @ins1_ins1_urem(
 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[R_SCALAR]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[R_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %i0 = insertelement <2 x i64> <i64 42, i64 undef>, i64 %x, i64 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index 0637b5005683..a400e8f42907 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -131,7 +131,7 @@ define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) {
 define <2 x i64> @shl_constant_op0(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -142,7 +142,7 @@ define <2 x i64> @shl_constant_op0(i64 %x) {
 define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -189,7 +189,7 @@ define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) {
 define <2 x i64> @shl_constant_op1(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -224,7 +224,7 @@ define <2 x i64> @shl_constant_op1_load(i64* %p) {
 define <2 x i64> @ashr_constant_op0(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -235,7 +235,7 @@ define <2 x i64> @ashr_constant_op0(i64 %x) {
 define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -246,7 +246,7 @@ define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) {
 define <2 x i64> @ashr_constant_op1(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -268,7 +268,7 @@ define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) {
 define <2 x i64> @lshr_constant_op0(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -279,7 +279,7 @@ define <2 x i64> @lshr_constant_op0(i64 %x) {
 define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -290,7 +290,7 @@ define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) {
 define <2 x i64> @lshr_constant_op1(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -312,7 +312,7 @@ define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) {
 define <2 x i64> @urem_constant_op0(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -323,7 +323,7 @@ define <2 x i64> @urem_constant_op0(i64 %x) {
 define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -334,7 +334,7 @@ define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) {
 define <2 x i64> @urem_constant_op1(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -356,7 +356,7 @@ define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) {
 define <2 x i64> @srem_constant_op0(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -367,7 +367,7 @@ define <2 x i64> @srem_constant_op0(i64 %x) {
 define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -378,7 +378,7 @@ define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) {
 define <2 x i64> @srem_constant_op1(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -400,7 +400,7 @@ define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) {
 define <2 x i64> @udiv_constant_op0(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -411,7 +411,7 @@ define <2 x i64> @udiv_constant_op0(i64 %x) {
 define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -422,7 +422,7 @@ define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) {
 define <2 x i64> @udiv_constant_op1(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -444,7 +444,7 @@ define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) {
 define <2 x i64> @sdiv_constant_op0(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op0(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -455,7 +455,7 @@ define <2 x i64> @sdiv_constant_op0(i64 %x) {
 define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -466,7 +466,7 @@ define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) {
 define <2 x i64> @sdiv_constant_op1(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op1(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
index 4fd33cc7ef28..abebf4d809af 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop.ll
@@ -53,7 +53,7 @@ define <2 x i64> @ins1_ins1_iterate(i64 %w, i64 %x, i64 %y, i64 %z) {
 ; CHECK-NEXT:    [[S0_SCALAR:%.*]] = sub i64 [[W:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    [[S1_SCALAR:%.*]] = or i64 [[S0_SCALAR]], [[Y:%.*]]
 ; CHECK-NEXT:    [[S2_SCALAR:%.*]] = shl i64 [[Z:%.*]], [[S1_SCALAR]]
-; CHECK-NEXT:    [[S2:%.*]] = insertelement <2 x i64> poison, i64 [[S2_SCALAR]], i64 1
+; CHECK-NEXT:    [[S2:%.*]] = insertelement <2 x i64> undef, i64 [[S2_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[S2]]
 ;
   %i0 = insertelement <2 x i64> undef, i64 %w, i64 1
@@ -128,7 +128,7 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) {
 define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) {
 ; CHECK-LABEL: @ins1_ins1_udiv(
 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 6, i64 poison>, i64 [[R_SCALAR]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 6, i64 undef>, i64 [[R_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %i0 = insertelement <2 x i64> <i64 42, i64 undef>, i64 %x, i32 1
@@ -143,7 +143,7 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) {
 define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
 ; CHECK-LABEL: @ins1_ins1_urem(
 ; CHECK-NEXT:    [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[R_SCALAR]], i64 1
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[R_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %i0 = insertelement <2 x i64> <i64 42, i64 undef>, i64 %x, i64 1
diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp
index 9eabc7c55638..96d3672647e8 100644
--- a/llvm/unittests/IR/ConstantsTest.cpp
+++ b/llvm/unittests/IR/ConstantsTest.cpp
@@ -27,7 +27,7 @@ TEST(ConstantsTest, Integer_i1) {
   Constant* Zero = ConstantInt::get(Int1, 0);
   Constant* NegOne = ConstantInt::get(Int1, static_cast<uint64_t>(-1), true);
   EXPECT_EQ(NegOne, ConstantInt::getSigned(Int1, -1));
-  Constant* Poison = PoisonValue::get(Int1);
+  Constant* Undef = UndefValue::get(Int1);
 
   // Input:  @b = constant i1 add(i1 1 , i1 1)
   // Output: @b = constant i1 false
@@ -53,21 +53,21 @@ TEST(ConstantsTest, Integer_i1) {
   // @g = constant i1 false
   EXPECT_EQ(Zero, ConstantExpr::getSub(One, One));
 
-  // @h = constant i1 shl(i1 1 , i1 1)  ; poison
-  // @h = constant i1 poison
-  EXPECT_EQ(Poison, ConstantExpr::getShl(One, One));
+  // @h = constant i1 shl(i1 1 , i1 1)  ; undefined
+  // @h = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getShl(One, One));
 
   // @i = constant i1 shl(i1 1 , i1 0)
   // @i = constant i1 true
   EXPECT_EQ(One, ConstantExpr::getShl(One, Zero));
 
-  // @j = constant i1 lshr(i1 1, i1 1)  ; poison
-  // @j = constant i1 poison
-  EXPECT_EQ(Poison, ConstantExpr::getLShr(One, One));
+  // @j = constant i1 lshr(i1 1, i1 1)  ; undefined
+  // @j = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getLShr(One, One));
 
-  // @m = constant i1 ashr(i1 1, i1 1)  ; poison
-  // @m = constant i1 poison
-  EXPECT_EQ(Poison, ConstantExpr::getAShr(One, One));
+  // @m = constant i1 ashr(i1 1, i1 1)  ; undefined
+  // @m = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getAShr(One, One));
 
   // @n = constant i1 mul(i1 -1, i1 1)
   // @n = constant i1 true
@@ -218,6 +218,7 @@ TEST(ConstantsTest, AsInstructionsTest) {
   Constant *Elt = ConstantInt::get(Int16Ty, 2015);
   Constant *Poison16 = PoisonValue::get(Int16Ty);
   Constant *Undef64  = UndefValue::get(Int64Ty);
+  Constant *UndefV16 = UndefValue::get(P6->getType());
   Constant *PoisonV16 = PoisonValue::get(P6->getType());
 
   #define P0STR "ptrtoint (i32** @dummy to i32)"
@@ -294,8 +295,8 @@ TEST(ConstantsTest, AsInstructionsTest) {
 
   EXPECT_EQ(Elt, ConstantExpr::getExtractElement(
                  ConstantExpr::getInsertElement(P6, Elt, One), One));
-  EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Two));
-  EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Big));
+  EXPECT_EQ(UndefV16, ConstantExpr::getInsertElement(P6, Elt, Two));
+  EXPECT_EQ(UndefV16, ConstantExpr::getInsertElement(P6, Elt, Big));
   EXPECT_EQ(PoisonV16, ConstantExpr::getInsertElement(P6, Elt, Undef64));
 }
 

From 91f34dabb92d8446142b3c5777fa83e6bcbdfa7e Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 2 Feb 2021 18:41:49 -0800
Subject: [PATCH 030/244] workflows: Re-enable lldb test on Mac OS X

---
 .github/workflows/lldb-tests.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml
index 93fddc2de8c6..68aec6036995 100644
--- a/.github/workflows/lldb-tests.yml
+++ b/.github/workflows/lldb-tests.yml
@@ -20,14 +20,16 @@ jobs:
   build_lldb:
     name: lldb build
     runs-on: ${{ matrix.os }}
+    # Workaround for build faliure on Mac OS X: llvm.org/PR46190, https://github.com/actions/virtual-environments/issues/2274
+    env:
+      CPLUS_INCLUDE_PATH: /usr/local/opt/llvm/include/c++/v1:/Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/include
     strategy:
       fail-fast: false
       matrix:
         os:
           - ubuntu-latest
           - windows-latest
-          # macOS build disabled due to: llvm.org/PR46190
-          #- macOS-latest
+          - macOS-latest
     steps:
     - name: Setup Windows
       if: startsWith(matrix.os, 'windows')

From 872608926129a61489d484e15cb9186882578c73 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 3 Feb 2021 03:09:24 +0000
Subject: [PATCH 031/244] workflows: Fix actions repository name for llvm tests

---
 .github/workflows/llvm-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 1cffc3ef4d97..1fcd67a10078 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -56,7 +56,7 @@ jobs:
 
       - name: Get LLVM version
         id: version
-        uses: tstellar/actions/get-llvm-version@get-version
+        uses: llvm/actions/get-llvm-version@main
 
       - name: Setup Variables
         id: vars

From 2a57ea296a4787828b52799564d7ddf02ec1c4f3 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 1 Feb 2021 13:05:19 +0000
Subject: [PATCH 032/244] workflows: Add job to check for ABI changes in
 libclang.so and libclang-cpp.so

---
 .github/workflows/libclang-abi-tests.yml | 132 +++++++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 .github/workflows/libclang-abi-tests.yml

diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml
new file mode 100644
index 000000000000..5681c7c8166e
--- /dev/null
+++ b/.github/workflows/libclang-abi-tests.yml
@@ -0,0 +1,132 @@
+name: libclang ABI Tests
+
+on:
+  push:
+    branches:
+      - 'release/**'
+    paths:
+      - 'clang/**'
+      - '.github/workflows/libclang-abi-tests.yml'
+  pull_request:
+    paths:
+      - 'clang/**'
+      - '.github/workflows/libclang-abi-tests.yml'
+
+jobs:
+  abi-dump-setup:
+    runs-on: ubuntu-latest
+    outputs:
+      BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }}
+      ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }}
+      ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }}
+      BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }}
+      LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
+      LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }}
+      LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }}
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v1
+        with:
+          fetch-depth: 1
+
+      - name: Get LLVM version
+        id: version
+        uses: llvm/actions/get-llvm-version@main
+
+      - name: Setup Variables
+        id: vars
+        run: |
+          if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then
+            echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))
+            echo ::set-output name=ABI_HEADERS::clang-c
+            echo ::set-output name=ABI_LIBS::libclang.so
+          else
+            echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
+            echo ::set-output name=ABI_HEADERS::.
+            echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so
+          fi
+
+  abi-dump:
+    needs: abi-dump-setup
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        name:
+          - build-baseline
+          - build-latest
+        include:
+          - name: build-baseline
+            llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}
+            ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0
+            repo: llvm/llvm-project
+          - name: build-latest
+            llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }}
+            ref: ${{ github.sha }}
+            repo: ${{ github.repository }}
+    steps:
+    - name: Install Ninja
+      uses: llvm/actions/install-ninja@main
+    - name: Install abi-compliance-checker
+      run: |
+        sudo apt-get install abi-dumper autoconf pkg-config
+    - name: Install universal-ctags
+      run: |
+        git clone https://github.com/universal-ctags/ctags.git
+        cd ctags
+        ./autogen.sh
+        ./configure
+        sudo make install
+    - name: Download source code
+      uses: llvm/actions/get-llvm-project-src@main
+      with:
+        ref: ${{ matrix.ref }}
+        repo: ${{ matrix.repo }}
+    - name: Configure
+      run: |
+        mkdir install
+        cmake -B build -S llvm -G Ninja -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_LINK_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g -Og" -DCMAKE_INSTALL_PREFIX=`pwd`/install llvm
+    - name: Build
+      run: ninja -C build/ ${{ needs.abi-dump-setup.outputs.ABI_LIBS }} install-clang-headers
+    - name: Dump ABI
+      run: |
+        parallel abi-dumper -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o {}-${{ matrix.ref }}.abi ./build/lib/{} ::: ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}
+        for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do
+          # Remove symbol versioning from dumps, so we can compare across major versions.
+          sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' $lib-${{ matrix.ref }}.abi
+          tar -czf $lib-${{ matrix.ref }}.abi.tar.gz $lib-${{ matrix.ref }}.abi
+        done
+    - name: Upload ABI file
+      uses: actions/upload-artifact@v2
+      with:
+        name: ${{ matrix.name }}
+        path: "*${{ matrix.ref }}.abi.tar.gz"
+
+  abi-compare:
+    runs-on: ubuntu-latest
+    needs:
+      - abi-dump-setup
+      - abi-dump
+    steps:
+      - name: Download baseline
+        uses: actions/download-artifact@v1
+        with:
+          name: build-baseline
+      - name: Download latest
+        uses: actions/download-artifact@v1
+        with:
+          name: build-latest
+
+      - name: Install abi-compliance-checker
+        run: sudo apt-get install abi-compliance-checker
+      - name: Compare ABI
+        run: |
+          for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do
+            abi-compliance-checker -lib $lib -old build-baseline/$lib*.abi.tar.gz -new build-latest/$lib*.abi.tar.gz
+          done
+      - name: Upload ABI Comparison
+        if: always()
+        uses: actions/upload-artifact@v2
+        with:
+          name: compat-report-${{ github.sha }}
+          path: compat_reports/
+

From c1899cd5102dbdacd006fdb33db075319ccc933f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 28 Jan 2021 11:21:21 +0000
Subject: [PATCH 033/244] [X86][AVX] Add PR48908 shuffle test case

(cherry picked from commit da8845fc3d3bb0b0e133f020931440511fa72723)
---
 .../X86/vector-shuffle-combining-avx.ll       | 151 ++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 979c365acfd7..3da83b25d363 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -436,6 +436,157 @@ entry:
   unreachable
 }
 
+define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double>* noalias %out0, <4 x double>* noalias %out1, <4 x double>* noalias %out2) {
+; X86-AVX1-LABEL: PR48908:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
+; X86-AVX1-NEXT:    vmovapd %ymm4, (%edx)
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX1-NEXT:    vmovapd %ymm3, (%ecx)
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X86-AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X86-AVX1-NEXT:    vmovapd %ymm0, (%eax)
+; X86-AVX1-NEXT:    vzeroupper
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: PR48908:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX2-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
+; X86-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
+; X86-AVX2-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
+; X86-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
+; X86-AVX2-NEXT:    vmovapd %ymm3, (%edx)
+; X86-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
+; X86-AVX2-NEXT:    vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
+; X86-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX2-NEXT:    vmovapd %ymm3, (%ecx)
+; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X86-AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X86-AVX2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X86-AVX2-NEXT:    vmovapd %ymm0, (%eax)
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512-LABEL: PR48908:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
+; X86-AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
+; X86-AVX512-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm5 = [0,0,3,0,8,0,1,0]
+; X86-AVX512-NEXT:    vpermt2pd %zmm2, %zmm5, %zmm3
+; X86-AVX512-NEXT:    vmovapd %ymm3, (%edx)
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = [0,0,3,0,10,0,1,0]
+; X86-AVX512-NEXT:    vpermt2pd %zmm0, %zmm3, %zmm4
+; X86-AVX512-NEXT:    vmovapd %ymm4, (%ecx)
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = <3,0,11,0,u,u,u,u>
+; X86-AVX512-NEXT:    vpermi2pd %zmm1, %zmm0, %zmm3
+; X86-AVX512-NEXT:    vmovapd {{.*#+}} ymm0 = [2,0,8,0,9,0,3,0]
+; X86-AVX512-NEXT:    vpermi2pd %zmm3, %zmm2, %zmm0
+; X86-AVX512-NEXT:    vmovapd %ymm0, (%eax)
+; X86-AVX512-NEXT:    vzeroupper
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX1-LABEL: PR48908:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
+; X64-AVX1-NEXT:    vmovapd %ymm4, (%rdi)
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X64-AVX1-NEXT:    vmovapd %ymm3, (%rsi)
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X64-AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X64-AVX1-NEXT:    vmovapd %ymm0, (%rdx)
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: PR48908:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X64-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X64-AVX2-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
+; X64-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm6 = ymm0[0,1],ymm2[0,1]
+; X64-AVX2-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
+; X64-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm6[0],ymm3[1],ymm6[2],ymm3[3]
+; X64-AVX2-NEXT:    vmovapd %ymm3, (%rdi)
+; X64-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm5[0,1],ymm0[2],ymm5[3]
+; X64-AVX2-NEXT:    vpermpd {{.*#+}} ymm4 = ymm4[0,3,2,0]
+; X64-AVX2-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X64-AVX2-NEXT:    vmovapd %ymm3, (%rsi)
+; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
+; X64-AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
+; X64-AVX2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3]
+; X64-AVX2-NEXT:    vmovapd %ymm0, (%rdx)
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: PR48908:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
+; X64-AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X64-AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; X64-AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X64-AVX512-NEXT:    vshufpd {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[2]
+; X64-AVX512-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X64-AVX512-NEXT:    vshufpd {{.*#+}} ymm4 = ymm1[1],ymm4[0],ymm1[2],ymm4[3]
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm5 = [0,3,8,1]
+; X64-AVX512-NEXT:    vpermt2pd %zmm2, %zmm5, %zmm3
+; X64-AVX512-NEXT:    vmovapd %ymm3, (%rdi)
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = [0,3,10,1]
+; X64-AVX512-NEXT:    vpermt2pd %zmm0, %zmm3, %zmm4
+; X64-AVX512-NEXT:    vmovapd %ymm4, (%rsi)
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm3 = <3,11,u,u>
+; X64-AVX512-NEXT:    vpermi2pd %zmm1, %zmm0, %zmm3
+; X64-AVX512-NEXT:    vmovapd {{.*#+}} ymm0 = [2,8,9,3]
+; X64-AVX512-NEXT:    vpermi2pd %zmm3, %zmm2, %zmm0
+; X64-AVX512-NEXT:    vmovapd %ymm0, (%rdx)
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
+  %t0 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %t1 = shufflevector <4 x double> %v1, <4 x double> %v2, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
+  %r0 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 0, i32 3, i32 6, i32 1>
+  store <4 x double> %r0, <4 x double>* %out0, align 32
+  %r1 = shufflevector <4 x double> %t0, <4 x double> %t1, <4 x i32> <i32 4, i32 7, i32 2, i32 5>
+  store <4 x double> %r1, <4 x double>* %out1, align 32
+  %t2 = shufflevector <4 x double> %v0, <4 x double> %v1, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
+  %r2 = shufflevector <4 x double> %t2, <4 x double> %v2, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+  store <4 x double> %r2, <4 x double>* %out2, align 32
+  ret void
+}
+
 define <4 x i64> @concat_self_v4i64(<2 x i64> %x) {
 ; AVX1-LABEL: concat_self_v4i64:
 ; AVX1:       # %bb.0:

From 52a70a07e93c322ad137bce1a1ff2f1c9fdf6050 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 28 Jan 2021 12:11:31 +0000
Subject: [PATCH 034/244] [X86][AVX] canonicalizeLaneShuffleWithRepeatedOps -
 don't merge VPERMILPD ops with different low/high masks.

Unlike VPERMILPS, VPERMILPD can have non-repeating masks in each 128-bit subvector, we weren't accounting for this when folding vperm2f128(vpermilpd(x,c),vpermilpd(y,c)) -> vpermilpd(vperm2f128(x,y),c).

I'm intending to add support for this but wanted to get a minimal fix in first for merging into 12.xx.

Fixes PR48908

(cherry picked from commit 6663330bc8c84a75ea092272297b557bfc310380)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  9 ++++-
 .../X86/vector-shuffle-combining-avx.ll       | 40 ++++++++++---------
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0dd20235aa3c..6b816c710f98 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36916,11 +36916,18 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
     Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res));
     return DAG.getBitcast(VT, Res);
   }
+  case X86ISD::VPERMILPI:
+    // TODO: Handle v4f64 permutes with different low/high lane masks.
+    if (SrcVT0 == MVT::v4f64) {
+      uint64_t Mask = Src0.getConstantOperandVal(1);
+      if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
+        break;
+    }
+    LLVM_FALLTHROUGH;
   case X86ISD::VSHLI:
   case X86ISD::VSRLI:
   case X86ISD::VSRAI:
   case X86ISD::PSHUFD:
-  case X86ISD::VPERMILPI:
     if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) {
       SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
       SDValue RHS =
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
index 3da83b25d363..1a1153d0e886 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
@@ -442,16 +442,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
-; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
-; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X86-AVX1-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2]
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm5
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3]
 ; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
-; X86-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
-; X86-AVX1-NEXT:    vmovapd %ymm4, (%edx)
-; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
-; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
-; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3]
+; X86-AVX1-NEXT:    vmovapd %ymm3, (%edx)
+; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3]
+; X86-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3]
 ; X86-AVX1-NEXT:    vmovapd %ymm3, (%ecx)
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; X86-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]
@@ -513,16 +515,18 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x
 ;
 ; X64-AVX1-LABEL: PR48908:
 ; X64-AVX1:       # %bb.0:
-; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm1[2,3],ymm2[0,1]
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm4
-; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm3 = ymm4[1],ymm3[0],ymm4[2],ymm3[3]
-; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[0,1],ymm0[0,1]
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3
+; X64-AVX1-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm3[0,1,2,2]
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1]
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm5
+; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3]
 ; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1]
-; X64-AVX1-NEXT:    vshufpd {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2],ymm4[2]
-; X64-AVX1-NEXT:    vmovapd %ymm4, (%rdi)
-; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm4 = ymm3[2,3,0,1]
-; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2],ymm3[3]
-; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2],ymm4[3]
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3]
+; X64-AVX1-NEXT:    vmovapd %ymm3, (%rdi)
+; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm3 = ymm4[2,3,0,1]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm4 = ymm4[0,1],ymm0[2],ymm4[3]
+; X64-AVX1-NEXT:    vblendpd {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2],ymm3[3]
 ; X64-AVX1-NEXT:    vmovapd %ymm3, (%rsi)
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; X64-AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm2[2,3],ymm1[2,3]

From 0564dd904bf7ef7758cb904ed8f7f2a1f915ef8d Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@plex.tv>
Date: Fri, 29 Jan 2021 08:44:56 +0100
Subject: [PATCH 035/244] [OpenMP] Fix python3 compatibility in openmp's
 lit.cfg

Differential Revision: https://reviews.llvm.org/D95669

(cherry picked from commit c3c02d0d5a313272f6d35926bdf678fc6b884c02)
---
 openmp/runtime/test/lit.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg
index 0d4a6107ff2b..c4e5fe1ea9e0 100644
--- a/openmp/runtime/test/lit.cfg
+++ b/openmp/runtime/test/lit.cfg
@@ -76,7 +76,7 @@ if config.operating_system == 'Darwin':
   cmd = subprocess.Popen(['xcrun', '--show-sdk-path'],
                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   out, err = cmd.communicate()
-  out = out.strip()
+  out = out.strip().decode()
   res = cmd.wait()
   if res == 0 and out:
     config.test_flags += " -isysroot " + out

From e3658cefc5bc3538d05fc8ef058d83bcd24b785a Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Sat, 30 Jan 2021 12:34:06 +0900
Subject: [PATCH 036/244] [VE] Change inetger constants 32-bit friendly

Correct integer constants like `1UL << 63` to `UINT64_C(1) << 63` in
order to make them work on 32-bit machines.  Tested on both an i386
and x86_64 machines.

Reviewed By: mgorny

Differential Revision: https://reviews.llvm.org/D95724

(cherry picked from commit 4648098f97fa2a7c08c04632c70cf29293528812)
---
 llvm/lib/Target/VE/VE.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index a404f7ced70a..8c1fa840f19c 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
     return true;
   }
   // (m)1 patterns
-  return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+  return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
 }
 
 inline static bool isMImm32Val(uint32_t Val) {
@@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) {
     return true;
   }
   // (m)1 patterns
-  return (Val & (1 << 31)) && isShiftedMask_32(Val);
+  return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
 }
 
 /// val2MImm - Convert an integer immediate value to target MImm immediate.
 inline static uint64_t val2MImm(uint64_t Val) {
   if (Val == 0)
     return 0; // (0)1
-  if (Val & (1UL << 63))
+  if (Val & (UINT64_C(1) << 63))
     return countLeadingOnes(Val);       // (m)1
   return countLeadingZeros(Val) | 0x40; // (m)0
 }
@@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) {
   if (Val == 0)
     return 0; // (0)1
   if ((Val & 0x40) == 0)
-    return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1
-  return ((uint64_t)(-1L) >> (Val & 0x3f));        // (m)0
+    return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+  return ((uint64_t)INT64_C(-1) >> (Val & 0x3f));          // (m)0
 }
 
 inline unsigned M0(unsigned Val) { return Val + 64; }

From b351efcae08a59c0cafa123a92b24c5f2300202b Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 27 Jan 2021 23:08:39 -0600
Subject: [PATCH 037/244] [PowerPC] Do not emit XXSPLTI32DX for sub 64-bit
 constants

If the APInt returned by BuildVectorSDNode::isConstantSplat() is narrower than
64 bits, the result produced by XXSPLTI32DX is incorrect. The result returned
by the function appears to be incorrect and we'll investigate/fix it in a
follow-up commit. However, since this causes miscompiles, we must
temporarily disable emitting this instruction for such values.

(cherry picked from commit 54e570d94af995ff58287a8288389641910a8239)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  3 ++-
 llvm/test/CodeGen/PowerPC/p10-splatImm32.ll | 22 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9215c17cb94b..663ee15db11e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8613,7 +8613,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
       return DAG.getBitcast(Op.getValueType(), SplatNode);
-    } else { // We may lose precision, so we have to use XXSPLTI32DX.
+    } else if (APSplatBits.getBitWidth() == 64) {
+      // We may lose precision, so we have to use XXSPLTI32DX.
 
       uint32_t Hi =
           (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
index 420a96dc1495..081cae729acf 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -100,3 +100,25 @@ define dso_local <8 x i16> @test_xxsplti32dx_9() {
 entry:
   ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef>
 }
+
+define dso_local <16 x i8> @test_xxsplti32dx_10() {
+; CHECK-LABEL: test_xxsplti32dx_10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1207959552
+; CHECK-NEXT:    blr
+entry:
+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 72, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 72>
+}
+
+; FIXME: It appears that there is something wrong with the computation
+;        of the 64-bit constant to splat so we cannot emit xxsplti32dx for
+;        this test case for now.
+define dso_local <16 x i8> @constSplatBug() {
+; CHECK-LABEL: constSplatBug:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv vs34, .LCPI10_0@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 71>
+}

From dfb763363bc560769605e37e96c1d13cb236223d Mon Sep 17 00:00:00 2001
From: Albion Fung <conanap@lep82435v.canlab.ibm.com>
Date: Thu, 28 Jan 2021 15:17:18 -0500
Subject: [PATCH 038/244] [PowerPC][Power10] Fix XXSPLI32DX not correctly
 exploiting specific cases

Some cases may be transformed into 32 bit splats before hitting the boolean statement, which may cause incorrect behaviour and provide XXSPLTI32DX with the incorrect values of splat. The condition was reversed so that the shortcut prevents this problem.

Differential Revision: https://reviews.llvm.org/D95634

(cherry picked from commit 2e470e03b49f1d79ebc315ca9d62a690a633c0cd)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 11 +++++++----
 llvm/test/CodeGen/PowerPC/p10-splatImm32.ll | 16 ++--------------
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 663ee15db11e..929a72ac687e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8604,16 +8604,19 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
-  // double. This is to exploit the XXSPLTIDP instruction.+  // If we lose precision, we use XXSPLTI32DX.
+  // double. This is to exploit the XXSPLTIDP instruction.
+  // If we lose precision, we use XXSPLTI32DX.
   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
       Subtarget.hasPrefixInstrs()) {
-    if (convertToNonDenormSingle(APSplatBits) &&
-        (Op->getValueType(0) == MVT::v2f64)) {
+    // Check the type first to short-circuit so we don't modify APSplatBits if
+    // this block isn't executed.
+    if ((Op->getValueType(0) == MVT::v2f64) &&
+        convertToNonDenormSingle(APSplatBits)) {
       SDValue SplatNode = DAG.getNode(
           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
       return DAG.getBitcast(Op.getValueType(), SplatNode);
-    } else if (APSplatBits.getBitWidth() == 64) {
+    } else {
       // We may lose precision, so we have to use XXSPLTI32DX.
 
       uint32_t Hi =
diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
index 081cae729acf..ce4c2da24b0d 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -101,23 +101,11 @@ entry:
   ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef>
 }
 
-define dso_local <16 x i8> @test_xxsplti32dx_10() {
-; CHECK-LABEL: test_xxsplti32dx_10:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor vs34, vs34, vs34
-; CHECK-NEXT:    xxsplti32dx vs34, 0, 1207959552
-; CHECK-NEXT:    blr
-entry:
-  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 72, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 72>
-}
-
-; FIXME: It appears that there is something wrong with the computation
-;        of the 64-bit constant to splat so we cannot emit xxsplti32dx for
-;        this test case for now.
 define dso_local <16 x i8> @constSplatBug() {
 ; CHECK-LABEL: constSplatBug:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI10_0@PCREL(0), 1
+; CHECK-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1191182336
 ; CHECK-NEXT:    blr
 entry:
   ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 71>

From 237b39a02f38b4903f39fef362d0f5e98e1de194 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <kai.wang@sifive.com>
Date: Fri, 29 Jan 2021 21:59:49 +0800
Subject: [PATCH 039/244] [RISCV] Update the version number to v0.10 for
 vector.

v0.10 is tagged in V specification. Update the version to v0.10.

Differential Revision: https://reviews.llvm.org/D95680

(cherry picked from commit 282aca10aeb03bdaef0a8d4f3faa4c2ff236e527)
---
 clang/lib/Basic/Targets/RISCV.cpp              |  6 +++---
 clang/lib/Driver/ToolChains/Arch/RISCV.cpp     |  2 +-
 clang/test/Driver/riscv-arch.c                 |  6 +++---
 .../test/Preprocessor/riscv-target-features.c  | 18 +++++++++---------
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp  |  6 +++---
 .../RISCV/MCTargetDesc/RISCVTargetStreamer.cpp |  6 +++---
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td       |  2 +-
 llvm/test/CodeGen/RISCV/attributes.ll          |  8 ++++----
 llvm/test/MC/RISCV/attribute-arch.s            |  8 ++++----
 9 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 0bf02e605740..786201ea340d 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -150,7 +150,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
   }
 
   if (HasV) {
-    Builder.defineMacro("__riscv_v", "1000000");
+    Builder.defineMacro("__riscv_v", "10000");
     Builder.defineMacro("__riscv_vector");
   }
 
@@ -191,10 +191,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__riscv_zfh", "1000");
 
   if (HasZvamo)
-    Builder.defineMacro("__riscv_zvamo", "1000000");
+    Builder.defineMacro("__riscv_zvamo", "10000");
 
   if (HasZvlsseg)
-    Builder.defineMacro("__riscv_zvlsseg", "1000000");
+    Builder.defineMacro("__riscv_zvlsseg", "10000");
 }
 
 /// Return true if has this feature, need to sync with handleTargetFeatures.
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index ffae47e5672e..c7f2a3ea5e02 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) {
       Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc")
     return RISCVExtensionVersion{"0", "93"};
   if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg")
-    return RISCVExtensionVersion{"1", "0"};
+    return RISCVExtensionVersion{"0", "10"};
   if (Ext == "zfh")
     return RISCVExtensionVersion{"0", "1"};
   return None;
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 3762a4aef1b3..cf148ca885d0 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -384,7 +384,7 @@
 // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1'
 // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s
 // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v"
 
@@ -412,7 +412,7 @@
 // RV32-EXPERIMENTAL-ZVAMO-BADVERS: error: invalid arch name 'rv32izvamo0p1'
 // RV32-EXPERIMENTAL-ZVAMO-BADVERS: unsupported version number 0.1 for experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVAMO-GOODVERS %s
 // RV32-EXPERIMENTAL-ZVAMO-GOODVERS: "-target-feature" "+experimental-zvamo"
 
@@ -431,6 +431,6 @@
 // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: error: invalid arch name 'rv32izvlsseg0p1'
 // RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: unsupported version number 0.1 for experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32izvlsseg0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS %s
 // RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS: "-target-feature" "+experimental-zvlsseg"
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 006395505246..88826bbd60b8 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -110,23 +110,23 @@
 // CHECK-DOUBLE-NOT: __riscv_float_abi_single
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN:   -march=rv32iv1p0 -x c -E -dM %s \
+// RUN:   -march=rv32iv0p10 -x c -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-V-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN:   -march=rv64iv1p0 -x c -E -dM %s \
+// RUN:   -march=rv64iv0p10 -x c -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo1p0 -x c -E -dM %s \
+// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo0p10 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo1p0 -x c -E -dM %s \
+// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvamo0p10 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg1p0 -x c -E -dM %s \
+// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg0p10 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg1p0 -x c -E -dM %s \
+// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions -march=rv32izvlsseg0p10 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// CHECK-V-EXT: __riscv_v 1000000
+// CHECK-V-EXT: __riscv_v 10000
 // CHECK-V-EXT: __riscv_vector 1
-// CHECK-V-EXT: __riscv_zvamo 1000000
-// CHECK-V-EXT: __riscv_zvlsseg 1000000
+// CHECK-V-EXT: __riscv_zvamo 10000
+// CHECK-V-EXT: __riscv_zvlsseg 10000
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions -march=rv32izba0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBA-EXT %s
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index e7e590153605..dcf7525d7458 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2126,7 +2126,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
       if (getFeatureBits(RISCV::FeatureStdExtB))
         formalArchStr = (Twine(formalArchStr) + "_b0p93").str();
       if (getFeatureBits(RISCV::FeatureStdExtV))
-        formalArchStr = (Twine(formalArchStr) + "_v1p0").str();
+        formalArchStr = (Twine(formalArchStr) + "_v0p10").str();
       if (getFeatureBits(RISCV::FeatureExtZfh))
         formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str();
       if (getFeatureBits(RISCV::FeatureExtZba))
@@ -2152,9 +2152,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
       if (getFeatureBits(RISCV::FeatureExtZbt))
         formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str();
       if (getFeatureBits(RISCV::FeatureExtZvamo))
-        formalArchStr = (Twine(formalArchStr) + "_zvamo1p0").str();
+        formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str();
       if (getFeatureBits(RISCV::FeatureStdExtZvlsseg))
-        formalArchStr = (Twine(formalArchStr) + "_zvlsseg1p0").str();
+        formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str();
 
       getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
     }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 72434a15bedb..13c4b84aa300 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -63,7 +63,7 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
   if (STI.hasFeature(RISCV::FeatureStdExtB))
     Arch += "_b0p93";
   if (STI.hasFeature(RISCV::FeatureStdExtV))
-    Arch += "_v1p0";
+    Arch += "_v0p10";
   if (STI.hasFeature(RISCV::FeatureExtZfh))
     Arch += "_zfh0p1";
   if (STI.hasFeature(RISCV::FeatureExtZba))
@@ -89,9 +89,9 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
   if (STI.hasFeature(RISCV::FeatureExtZbt))
     Arch += "_zbt0p93";
   if (STI.hasFeature(RISCV::FeatureExtZvamo))
-    Arch += "_zvamo1p0";
+    Arch += "_zvamo0p10";
   if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg))
-    Arch += "_zvlsseg1p0";
+    Arch += "_zvlsseg0p10";
 
   emitTextAttribute(RISCVAttrs::ARCH, Arch);
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 4f9e9cfbdb98..e02c9f8bcbe2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// This file describes the RISC-V instructions from the standard 'V' Vector
-/// extension, version 0.9.
+/// extension, version 0.10.
 /// This version is still experimental as the 'V' extension hasn't been
 /// ratified yet.
 ///
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index a0943d5d4293..c26a6d5b4a69 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -47,7 +47,7 @@
 ; RV32D: .attribute 5, "rv32i2p0_f2p0_d2p0"
 ; RV32C: .attribute 5, "rv32i2p0_c2p0"
 ; RV32B: .attribute 5, "rv32i2p0_b0p93_zba0p93_zbb0p93_zbc0p93_zbe0p93_zbf0p93_zbm0p93_zbp0p93_zbr0p93_zbs0p93_zbt0p93"
-; RV32V: .attribute 5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0"
+; RV32V: .attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10"
 ; RV32ZFH: .attribute 5, "rv32i2p0_f2p0_zfh0p1"
 ; RV32ZBA: .attribute 5, "rv32i2p0_zba0p93"
 ; RV32ZBB: .attribute 5, "rv32i2p0_zbb0p93"
@@ -60,7 +60,7 @@
 ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93"
 ; RV32ZBS: .attribute 5, "rv32i2p0_zbs0p93"
 ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93"
-; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v1p0_zfh0p1_zbb0p93_zvamo1p0_zvlsseg1p0"
+; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v0p10_zfh0p1_zbb0p93_zvamo0p10_zvlsseg0p10"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -80,8 +80,8 @@
 ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93"
 ; RV64ZBS: .attribute 5, "rv64i2p0_zbs0p93"
 ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93"
-; RV64V: .attribute 5, "rv64i2p0_v1p0_zvamo1p0_zvlsseg1p0"
-; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v1p0_zfh0p1_zbb0p93_zvamo1p0_zvlsseg1p0"
+; RV64V: .attribute 5, "rv64i2p0_v0p10_zvamo0p10_zvlsseg0p10"
+; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v0p10_zfh0p1_zbb0p93_zvamo0p10_zvlsseg0p10"
 
 
 define i32 @addi(i32 %a) {
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 66d7ad576382..51d0c6ace9e1 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -40,7 +40,7 @@
 # CHECK: attribute      5, "rv32i2p0_b0p93_zba0p93_zbb0p93_zbc0p93_zbe0p93_zbf0p93_zbm0p93_zbp0p93_zbr0p93_zbs0p93_zbt0p93"
 
 .attribute arch, "rv32iv"
-# CHECK: attribute      5, "rv32i2p0_v1p0"
+# CHECK: attribute      5, "rv32i2p0_v0p10"
 
 .attribute arch, "rv32izba"
 # CHECK: attribute      5, "rv32i2p0_zba0p93"
@@ -79,7 +79,7 @@
 # CHECK: attribute      5, "rv32i2p0_f2p0_zfh0p1"
 
 .attribute arch, "rv32ivzvamo_zvlsseg"
-# CHECK: attribute      5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0"
+# CHECK: attribute      5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10"
 
-.attribute arch, "rv32iv_zvamo1p0_zvlsseg"
-# CHECK: attribute      5, "rv32i2p0_v1p0_zvamo1p0_zvlsseg1p0"
+.attribute arch, "rv32iv_zvamo0p10_zvlsseg"
+# CHECK: attribute      5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10"

From c738c8aa9bf387cc960feca81bc5263e8c634e15 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <kai.wang@sifive.com>
Date: Sat, 30 Jan 2021 07:54:41 +0800
Subject: [PATCH 040/244] [RISCV]  Update the version number to v0.10 for
 vector.

(cherry picked from commit 9847023660467a4469b5667bcf7a4c73a4780037)
---
 llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td    | 2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 06e4d053d5d7..9fdfc2727d86 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// This file contains the required infrastructure to support code generation
-/// for the standard 'V' (Vector) extension, version 0.9.  This version is still
+/// for the standard 'V' (Vector) extension, version 0.10.  This version is still
 /// experimental as the 'V' extension hasn't been ratified yet.
 ///
 /// This file is included from RISCVInstrInfoV.td
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index aea3d0e17ccc..79a1e6ddc8a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -8,7 +8,7 @@
 ///
 /// This file contains the required infrastructure and SDNode patterns to
 /// support code generation for the standard 'V' (Vector) extension, version
-/// 0.9.  This version is still experimental as the 'V' extension hasn't been
+/// 0.10.  This version is still experimental as the 'V' extension hasn't been
 /// ratified yet.
 ///
 /// This file is included from and depends upon RISCVInstrInfoVPseudos.td

From c5904f5c9d32e563e2898e1242d5818e488fe2ee Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Sat, 16 Jan 2021 16:08:40 +0000
Subject: [PATCH 041/244] [LV] Fix crash when computing max VF too early

D90687 introduced a crash:

  llvm::LoopVectorizationCostModel::computeMaxVF(llvm::ElementCount, unsigned int):
    Assertion `WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() &&
    "No decisions should have been taken at this point"' failed.

when compiling the following C code:

  typedef struct {
  char a;
  } b;

  b *c;
  int d, e;

  int f() {
    int g = 0;
    for (; d; d++) {
      e = 0;
      for (; e < c[d].a; e++)
        g++;
    }
    return g;
  }

with:

  clang -Os -target hexagon -mhvx -fvectorize -mv67 testcase.c -S -o -

This occurred since prior to D90687 computeFeasibleMaxVF would only be
called in computeMaxVF when a scalar epilogue was allowed, but now it's
always called. This causes the assert above since computeFeasibleMaxVF
collects all viable VFs larger than the default MaxVF, and for each VF
calculates the register usage which results in analysis being done the
assert above guards against. This can occur in computeFeasibleMaxVF if
TTI.shouldMaximizeVectorBandwidth and this target hook is implemented in
the hexagon backend to always return true.

Reported by @iajbar.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D94869

(cherry picked from commit 8cda227432f1c9ceb63b88802ed8136da97274f1)
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  7 ++---
 .../LoopVectorize/Hexagon/maximum-vf-crash.ll | 29 +++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea0d7673edf6..47635dbdda02 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5504,11 +5504,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     return None;
   }
 
-  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
-
   switch (ScalarEpilogueStatus) {
   case CM_ScalarEpilogueAllowed:
-    return MaxVF;
+    return computeFeasibleMaxVF(TC, UserVF);
   case CM_ScalarEpilogueNotAllowedUsePredicate:
     LLVM_FALLTHROUGH;
   case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5546,7 +5544,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
                            "scalar epilogue instead.\n");
       ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
-      return MaxVF;
+      return computeFeasibleMaxVF(TC, UserVF);
     }
     return None;
   }
@@ -5563,6 +5561,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
+  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
   assert(!MaxVF.isScalable() &&
          "Scalable vectors do not yet support tail folding");
   assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
new file mode 100644
index 000000000000..5f8c5d329edf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Check that we don't crash.
+
+; CHECK-LABEL: @f
+; CHECK: vector.body
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+; Function Attrs: optsize
+define i32 @f() #0 {
+entry:
+  br label %loop
+
+loop:
+  %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %0 = load i8, i8* undef, align 1
+  %g.1.lcssa = add i32 %g.016, undef
+  %iv.next = add nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, 0
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret i32 %g.1.lcssa
+}
+
+attributes #0 = { optsize "target-features"="+hvx-length128b" }

From b15f3fc5c71dc8a9db7e931e2922a065293e4a64 Mon Sep 17 00:00:00 2001
From: Andrew Ng <andrew.ng@sony.com>
Date: Wed, 27 Jan 2021 16:47:21 +0000
Subject: [PATCH 042/244] [X86] Fix disassembly of x86-64 GDTLS code sequence

For x86-64 the REX.w prefix takes precedence over any other size
override (i.e. 0x66). Therefore, for x86-64 when REX.w is present set
'hasOpSize' to false to ensure that any size override is ignored.

Fixes PR48901.

Differential Revision: https://reviews.llvm.org/D95682

(cherry picked from commit 94fedd266125a5425aa33e11332bf414f0b6dc35)
---
 .../X86/Disassembler/X86Disassembler.cpp      |  1 +
 llvm/test/MC/Disassembler/X86/x86-64.txt      | 10 +++++++---
 .../llvm-objdump/X86/disassemble-gdtls.s      | 19 +++++++++++++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s

diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 05e482a6b66e..4e6d8e8e1a54 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
       insn->addressSize = (insn->hasAdSize ? 4 : 8);
       insn->displacementSize = 4;
       insn->immediateSize = 4;
+      insn->hasOpSize = false;
     } else {
       insn->registerSize = (insn->hasOpSize ? 2 : 4);
       insn->addressSize = (insn->hasAdSize ? 4 : 8);
diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt
index d91ef2500d99..5e56d4c796e6 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -329,8 +329,10 @@
 # CHECK: callw 32767
 0x66 0xe8 0xff 0x7f
 
-# CHECK: callw 32767
-0x66 0x66 0x48 0xe8 0xff 0x7f
+# TODO: Should display data16 prefixes.
+# CHECK-NOT: data16
+# CHECK: callq 32767
+0x66 0x66 0x48 0xe8 0xff 0x7f 0x00 0x00
 
 # CHECK: jmp -32769
 0xe9 0xff 0x7f 0xff 0xff
@@ -338,8 +340,10 @@
 # CHECK: jmp 32767
 0x66 0xe9 0xff 0x7f
 
+# TODO: Should display data16 prefixes.
+# CHECK-NOT: data16
 # CHECK: jmp 32767
-0x66 0x66 0x48 0xe9 0xff 0x7f
+0x66 0x66 0x48 0xe9 0xff 0x7f 0x00 0x00
 
 # CHECK: jo -32769
 0x0f 0x80 0xff 0x7f 0xff 0xff
diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s
new file mode 100644
index 000000000000..e913f5f6a345
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/disassemble-gdtls.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc %s -filetype=obj -triple=x86_64 | llvm-objdump -d - | FileCheck %s
+
+# CHECK:      <PR48901>:
+# TODO: Should display data16 prefixes.
+# CHECK-NEXT: 0: 66 48 8d 3d 00 00 00 00       leaq    (%rip), %rdi  # 8 <PR48901+0x8>
+# CHECK-NEXT: 8: 66 66 48 e8 00 00 00 00       callq   0x10 <PR48901+0x10>
+# CHECK-EMPTY:
+
+PR48901:
+ data16
+ leaq   bar@TLSGD(%rip),%rdi
+ data16
+ data16
+ rex64
+ callq  __tls_get_addr@PLT
+
+.section .tdata,"awT",@progbits
+bar:
+.long 42

From e2d822c3bdf6388c6ef21f35745105aba064d16d Mon Sep 17 00:00:00 2001
From: Haowei Wu <haowei@google.com>
Date: Thu, 28 Jan 2021 14:13:20 -0800
Subject: [PATCH 043/244] [elfabi] Fix tests which failed on different
 timezones

This patch fixes elfabi tests on machines using a GMT+X timezone
settings.

Differential Revision: https://reviews.llvm.org/D95641

(cherry picked from commit 771b35965457ebd5faaed8a1c3d2bcefffe721a3)
---
 llvm/test/tools/llvm-elfabi/preserve-dates-stub.test | 4 ++--
 llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
index c399029e0337..9742a61aa281 100644
--- a/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
+++ b/llvm/test/tools/llvm-elfabi/preserve-dates-stub.test
@@ -1,9 +1,9 @@
 ## Test writing unchanged content to ELF Stub file with --write-if-changed flag.
 
 # RUN: llvm-elfabi %s --output-target=elf64-little %t
-# RUN: touch -m -t 197001010000 %t
+# RUN: env TZ=GMT touch -m -t 197001010000 %t
 # RUN: llvm-elfabi %s --output-target=elf64-little %t --write-if-changed
-# RUN: ls -l %t | FileCheck %s
+# RUN: env TZ=GMT ls -l %t | FileCheck %s
 
 --- !tapi-tbe
 TbeVersion: 1.0
diff --git a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test
index 89cad7733eee..3ec190067c73 100644
--- a/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test
+++ b/llvm/test/tools/llvm-elfabi/preserve-dates-tbe.test
@@ -1,8 +1,8 @@
 ## Test writing unchanged content to TBE file with --write-if-changed flag.
 
 # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t
-# RUN: touch -m -t 197001010000 %t
+# RUN: env TZ=GMT touch -m -t 197001010000 %t
 # RUN: llvm-elfabi --elf %p/Inputs/gnu_hash.so --emit-tbe=%t --write-if-changed
-# RUN: ls -l %t | FileCheck %s
+# RUN: env TZ=GMT ls -l %t | FileCheck %s
 
 # CHECK: {{[[:space:]]1970}}

From 12b6579b79dc21e9e54e74520ece0d571a640d4b Mon Sep 17 00:00:00 2001
From: Atmn Patel <atmndp@gmail.com>
Date: Wed, 27 Jan 2021 18:49:41 -0500
Subject: [PATCH 044/244] [OpenMP][Libomptarget] Fix conditional in CMake for
 remote plugin

The remote offloading plugin's CMakeLists was trying to build if its
flag was enabled even if it didn't find gRPC/protobuf. The conditional
was wrong, it's fixed by this.

Differential Revision: https://reviews.llvm.org/D95574

(cherry picked from commit 8a77056256d9970387595a5c729d894e3fe07131)
---
 openmp/libomptarget/plugins/remote/CMakeLists.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/openmp/libomptarget/plugins/remote/CMakeLists.txt b/openmp/libomptarget/plugins/remote/CMakeLists.txt
index 1baa1125f44c..989c74642c66 100644
--- a/openmp/libomptarget/plugins/remote/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/remote/CMakeLists.txt
@@ -42,12 +42,13 @@ if (Protobuf_FOUND AND gRPC_FOUND AND PROTOC AND GRPC_CPP_PLUGIN)
   set(GRPC_INCLUDE_DIR
       ${directory}
   )
+
+  set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/)
+  set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/)
+  
+  add_subdirectory(src)
+  add_subdirectory(server)
 else()
   libomptarget_say("Not building remote offloading plugin: required libraries were not found.")
 endif()
 
-set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/)
-set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/)
-
-add_subdirectory(src)
-add_subdirectory(server)

From 4d0874c72a0a3f53eb3084a1ea3ee4456ab6e004 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 28 Jan 2021 08:13:28 -0500
Subject: [PATCH 045/244] [OpenMP][NVPTX] Added the missing -O1 when building
 NVPTX bitcode libraries

In the past `-O1` was used when building NVPTX bitcode libraries. After
we switched to OpenMP, `-O1` was missing by mistake, leading to a huge performance
regression.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95545

(cherry picked from commit 5a64794bbad4010778406dfee7748e6080258dbf)
---
 .../libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 23efbba29d66..eeda137ef120 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -126,14 +126,14 @@ set(cuda_src_files
 )
 
 # Set flags for LLVM Bitcode compilation.
-set(bc_flags -S -x c++
-              -target nvptx64
-              -Xclang -emit-llvm-bc
-              -Xclang -aux-triple -Xclang ${aux_triple}
-              -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
-              -D__CUDACC__
-              -I${devicertl_base_directory}
-              -I${devicertl_nvptx_directory}/src)
+set(bc_flags -S -x c++ -O1 -std=c++14
+             -target nvptx64
+             -Xclang -emit-llvm-bc
+             -Xclang -aux-triple -Xclang ${aux_triple}
+             -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
+             -D__CUDACC__
+             -I${devicertl_base_directory}
+             -I${devicertl_nvptx_directory}/src)
 
 if(${LIBOMPTARGET_NVPTX_DEBUG})
   list(APPEND bc_flags -DOMPTARGET_NVPTX_DEBUG=-1)

From 5d926bb3c46848c704833e0f02884395609388a3 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 28 Jan 2021 08:12:39 -0500
Subject: [PATCH 046/244] [OpenMP][deviceRTLs] Added
 `[[clang::loader_uninitialized]]` explicitly

`[[clang::loader_uninitialized]]` is in macro `SHARED` but it doesn't
work for array like `parallelLevel`, so the variable will be zero initialized.
There is also a similar issue for `omptarget_nvptx_device_State` which is in
global address space. Its c'tor is also generated, which was not in the past when
building the `deviceRTLs` with CUDA. In this patch, we added the attribute to
the two variables explicitly.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95550

(cherry picked from commit 19248d30e4ed5250fa84abbbd52fc7b835918a45)
---
 openmp/libomptarget/deviceRTLs/common/src/omp_data.cu | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
index b91afd7476fe..4736d07108e0 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu
@@ -25,7 +25,8 @@ DEVICE omptarget_device_environmentTy omptarget_device_environment;
 // global data holding OpenMP state information
 ////////////////////////////////////////////////////////////////////////////////
 
-DEVICE
+// OpenMP will try to call its ctor if we don't add the attribute explicitly
+[[clang::loader_uninitialized]] DEVICE
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 
@@ -33,7 +34,9 @@ DEVICE omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager;
 DEVICE uint32_t SHARED(usedMemIdx);
 DEVICE uint32_t SHARED(usedSlotIdx);
 
-DEVICE uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+// SHARED doesn't work with array so we add the attribute explicitly.
+[[clang::loader_uninitialized]] DEVICE uint8_t
+    parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
 #pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc)
 DEVICE uint16_t SHARED(threadLimit);
 DEVICE uint16_t SHARED(threadsInTeam);

From 255f7398845a7cfb47aef53e40b68057ec56839e Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Fri, 29 Jan 2021 13:12:47 -0500
Subject: [PATCH 047/244] [OpenMP][NFC] Added release note for new `deviceRTLs`
 and hidden helper task

Added release note for new `deviceRTLs` and hidden helper task for LLVM
12.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95584

(cherry picked from commit 7bc31018f71cac22b7060c49cefb6f3d0d2e2069)
---
 openmp/docs/ReleaseNotes.rst | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/openmp/docs/ReleaseNotes.rst b/openmp/docs/ReleaseNotes.rst
index 7f40d3c81510..cb3464ad84f0 100644
--- a/openmp/docs/ReleaseNotes.rst
+++ b/openmp/docs/ReleaseNotes.rst
@@ -7,7 +7,7 @@ OpenMP 12.0.0 Release Notes
    These are in-progress notes for the upcoming LLVM 12.0.0 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
-   
+
 
 Introduction
 ============
@@ -44,3 +44,27 @@ Non-comprehensive list of changes in this release
   ``LIBOMPTARGET_INFO`` allows the user to request certain information from the
   ``libomptarget`` runtime using a 32-bit field. A full description of each
   environment variable is described :ref:`here <libopenmptarget_environment_vars>`.
+
+- ``target nowait`` was supported via hidden helper task, which is a task not
+  bound to any parallel region. A hidden helper team with a number of threads is
+  created when the first hidden helper task is encountered. The number of threads
+  can be configured via the environment variable
+  ``LIBOMP_NUM_HIDDEN_HELPER_THREADS``. By default it is 8. If
+  ``LIBOMP_NUM_HIDDEN_HELPER_THREADS=0``, hidden helper task is disabled and
+  falls back to a regular OpenMP task. It can also be disabled by setting the
+  environment variable ``LIBOMP_USE_HIDDEN_HELPER_TASK=OFF``.
+
+- ``deviceRTLs`` for NVPTX platform is CUDA free now. It is generally OpenMP code.
+  Target dependent parts are implemented with Clang/LLVM/NVVM intrinsics. CUDA
+  SDK is also dropped as a dependence to build the device runtime, which means
+  device runtime can also be built on a CUDA free system. However, it is
+  disabled by default. Set the CMake variable
+  ``LIBOMPTARGET_BUILD_NVPTX_BCLIB=ON`` to enable the build of NVPTX device
+  runtime on a CUDA free system. ``gcc-multilib`` and ``g++-multilib`` are
+  required. If CUDA is found, the device runtime will be built by default.
+
+  - Static NVPTX device runtime library (``libomptarget-nvptx.a``) was dropped.
+  A bitcode library is required to build an OpenMP program. If the library is
+  not found in the default path or any of the paths defined by ``LIBRARY_PATH``,
+  an error will be raised. User can also specify the path to the bitcode device
+  library via ``--libomptarget-nvptx-bc-path=``.

From 922e4149d16754b54ce225faa3e769d32937d7ad Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Mon, 1 Feb 2021 10:31:09 -0500
Subject: [PATCH 048/244] [OpenMP] Fix seg fault in libomptarget when using
 Info with multiple threads

Summary:
One option for the LIBOMPTARGET_INFO environment variable is to print the current status of the device's data mappings. These are a shared resource among threads so this needs to be protected when using multiple streams.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95786

(cherry picked from commit fda48539988d2a1bdb6395799151e9090312a20b)
---
 openmp/libomptarget/src/interface.cpp | 4 ++--
 openmp/libomptarget/src/private.h     | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index cf6d36960c75..01f3715d6bcc 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -58,7 +58,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) {
   case tgt_mandatory:
     if (!success) {
       if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
-        for (const auto &Device : PM->Devices)
+        for (auto &Device : PM->Devices)
           dumpTargetPointerMappings(loc, Device);
       else
         FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target "
@@ -76,7 +76,7 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) {
           1, "failure of target construct while offloading is mandatory");
     } else {
       if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
-        for (const auto &Device : PM->Devices)
+        for (auto &Device : PM->Devices)
           dumpTargetPointerMappings(loc, Device);
     }
     break;
diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index fb6f681d3020..3b0e57dfe15e 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -99,7 +99,7 @@ int __kmpc_get_target_offload(void) __attribute__((weak));
 ////////////////////////////////////////////////////////////////////////////////
 /// dump a table of all the host-target pointer pairs on failure
 static inline void dumpTargetPointerMappings(const ident_t *Loc,
-                                             const DeviceTy &Device) {
+                                             DeviceTy &Device) {
   if (Device.HostDataToTargetMap.empty())
     return;
 
@@ -109,6 +109,7 @@ static inline void dumpTargetPointerMappings(const ident_t *Loc,
        Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn());
   INFO(OMP_INFOTYPE_ALL, Device.DeviceID, "%-18s %-18s %s %s %s\n", "Host Ptr",
        "Target Ptr", "Size (B)", "RefCount", "Declaration");
+  Device.DataMapMtx.lock();
   for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
     SourceInfo Info(HostTargetMap.HstPtrName);
     INFO(OMP_INFOTYPE_ALL, Device.DeviceID,
@@ -118,6 +119,7 @@ static inline void dumpTargetPointerMappings(const ident_t *Loc,
          HostTargetMap.getRefCount(), Info.getName(), Info.getFilename(),
          Info.getLine(), Info.getColumn());
   }
+  Device.DataMapMtx.unlock();
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From 678c259d277135ef32861887a8ac8618deba5f24 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Wed, 3 Feb 2021 14:57:19 -0800
Subject: [PATCH 049/244] PR44325 (and duplicates): don't issue
 -Wzero-as-null-pointer-constant when rewriting 'a < b' as '(a <=> b) < 0'.

It's pretty common for comparison category types to use a pointer or
pointer-to-member type as their '0' parameter.

(cherry picked from commit 1f06f41993b6363e6b2c4f22a13488a3e687f31b)
---
 clang/lib/Sema/Sema.cpp                       |  7 +++++++
 .../SemaCXX/cxx2a-three-way-comparison.cpp    | 20 ++++++++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 55cb3aee6194..cb5a84a31235 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -537,6 +537,13 @@ void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, const Expr* E) {
   if (E->IgnoreParenImpCasts()->getType()->isNullPtrType())
     return;
 
+  // Don't diagnose the conversion from a 0 literal to a null pointer argument
+  // in a synthesized call to operator<=>.
+  if (!CodeSynthesisContexts.empty() &&
+      CodeSynthesisContexts.back().Kind ==
+          CodeSynthesisContext::RewritingOperatorAsSpaceship)
+    return;
+
   // If it is a macro from system header, and if the macro name is not "NULL",
   // do not warn.
   SourceLocation MaybeMacroLoc = E->getBeginLoc();
diff --git a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp
index 353360e052bb..b94225274fff 100644
--- a/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp
+++ b/clang/test/SemaCXX/cxx2a-three-way-comparison.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++2a -verify %s
+// RUN: %clang_cc1 -std=c++2a -verify %s -Wzero-as-null-pointer-constant
 
 // Keep this test before any declarations of operator<=>.
 namespace PR44786 {
@@ -40,3 +40,21 @@ namespace PR47893 {
   int &f(...);
   int &r = f(A(), A());
 }
+
+namespace PR44325 {
+  struct cmp_cat {};
+  bool operator<(cmp_cat, void*);
+  bool operator>(cmp_cat, int cmp_cat::*);
+
+  struct X {};
+  cmp_cat operator<=>(X, X);
+
+  bool b1 = X() < X(); // no warning
+  bool b2 = X() > X(); // no warning
+
+  // FIXME: It's not clear whether warning here is useful, but we can't really
+  // tell that this is a comparison category in general. This is probably OK,
+  // as comparisons against zero are only really intended for use in the
+  // implicit rewrite rules, not for explicit use by programs.
+  bool c = cmp_cat() < 0; // expected-warning {{zero as null pointer constant}}
+}

From 2a917b70e770e2d25d96f91beebf2a3e52bb9e66 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Wed, 3 Feb 2021 23:04:12 +0000
Subject: [PATCH 050/244] Extend release notes for AST Matchers changes

---
 clang/docs/ReleaseNotes.rst | 38 +++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a34cd512ca59..9efd4c01f053 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -250,15 +250,41 @@ release of Clang. Users of the build system should adjust accordingly.
 AST Matchers
 ------------
 
-- The behavior of TK_IgnoreUnlessSpelledInSource with the traverse() matcher
-  has been changed to no longer match on template instantiations or on
+- The ``mapAnyOf()`` matcher was added. This allows convenient matching of
+  different AST nodes which have a compatible matcher API. For example,
+  ``mapAnyOf(ifStmt, forStmt).with(hasCondition(integerLiteral()))``
+  matches any ``IfStmt`` or ``ForStmt`` with a integer literal as the
+  condition.
+
+- The ``binaryOperation()`` matcher allows matching expressions which
+  appear like binary operators in the code, even if they are really
+  ``CXXOperatorCallExpr`` for example. It is based on the ``mapAnyOf()``
+  matcher functionality. The matcher API for the latter node has been
+  extended with ``hasLHS()`` etc to facilitate the abstraction.
+
+- Matcher API for ``CXXRewrittenBinaryOperator`` has been added. In addition
+  to explicit matching with the ``cxxRewrittenBinaryOperator()`` matcher, the
+  ``binaryOperation()`` matches on nodes of this type.
+
+- The behavior of ``TK_IgnoreUnlessSpelledInSource`` with the ``traverse()``
+  matcher has been changed to no longer match on template instantiations or on
   implicit nodes which are not spelled in the source.
 
-- The TK_IgnoreImplicitCastsAndParentheses traversal kind was removed. It
-  is recommended to use TK_IgnoreUnlessSpelledInSource instead.
+- The ``TK_IgnoreImplicitCastsAndParentheses`` traversal kind was removed. It
+  is recommended to use ``TK_IgnoreUnlessSpelledInSource`` instead.
 
-- The behavior of the forEach() matcher was changed to not internally ignore
-  implicit and parenthesis nodes.
+- The behavior of the ``forEach()`` matcher was changed to not internally
+  ignore implicit and parenthesis nodes.  This makes it consistent with
+  the ``has()`` matcher.  Uses of ``forEach()`` relying on the old behavior
+  can now use the  ``traverse()`` matcher or ``ignoringParenCasts()``.
+
+- Several AST Matchers have been changed to match based on the active
+  traversal mode.  For example, ``argumentCountIs()`` matches the number of
+  arguments written in the source, ignoring default arguments represented
+  by ``CXXDefaultArgExpr`` nodes.
+
+- Improvements in AST Matchers allow more matching of template declarations,
+  independent of their template instantations.
 
 clang-format
 ------------

From f5602e0bf31ab590da19fa357980a753dbfd666e Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 28 Jan 2021 07:24:19 -0500
Subject: [PATCH 051/244] [OpenMP] Disabled profiling in `libomp` by default to
 unblock link errors

Link error occurred when time profiling in libomp is enabled by default
because `libomp` is assumed to be a C library but the dependence on
`libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all
OpenMP tests in Phabricator.

This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to
enable/disable the feature. By default it is disabled. Note that once time
profiling is enabled for `libomp`, it becomes a C++ library.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95585

(cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2)
---
 openmp/CMakeLists.txt                 |  6 ++++++
 openmp/docs/design/Runtimes.rst       |  5 ++++-
 openmp/runtime/CMakeLists.txt         |  6 +++---
 openmp/runtime/src/CMakeLists.txt     | 12 +++++++++++-
 openmp/runtime/src/kmp_config.h.cmake |  4 ++--
 openmp/runtime/src/kmp_runtime.cpp    |  6 +++---
 6 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index 67600bebdafb..4787d4b5a321 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -86,6 +86,12 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading."
        ${ENABLE_LIBOMPTARGET})
 option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget."
        ${ENABLE_LIBOMPTARGET})
+option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF)
+
+# Build host runtime library, after LIBOMPTARGET variables are set since they are needed
+# to enable time profiling support in the OpenMP runtime.
+add_subdirectory(runtime)
+
 if (OPENMP_ENABLE_LIBOMPTARGET)
   # Check that the library can actually be built.
   if (APPLE OR WIN32)
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 016b88ba324b..ad36e43eccdc 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on
 `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_
 for time trace output. Using this library is enabled by default when building
 using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will
-be saved to the filename specified by the environment variable.
+be saved to the filename specified by the environment variable. For multi-threaded
+applications, profiling in ``libomp`` is also needed. Setting the CMake option
+``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will
+turn ``libomp`` into a C++ library.
 
 .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
 
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 9fdd04f41646..8828ff8ef455 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD})
   # Should assertions be enabled?  They are on by default.
   set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
     "enable assertions?")
-  set(LIBOMPTARGET_PROFILING_SUPPORT FALSE)
 else() # Part of LLVM build
   # Determine the native architecture from LLVM.
   string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH)
@@ -66,10 +65,11 @@ else() # Part of LLVM build
     libomp_get_architecture(LIBOMP_ARCH)
   endif ()
   set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
-  # Time profiling support
-  set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING})
 endif()
 
+# Time profiling support
+set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING})
+
 # FUJITSU A64FX is a special processor because its cache line size is 256.
 # We need to pass this information into kmp_config.h.
 if(LIBOMP_ARCH STREQUAL "aarch64")
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index 2e927df84f5c..822f9ca2b825 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -50,6 +50,14 @@ if(${LIBOMP_USE_HWLOC})
   include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
 endif()
 
+# Building with time profiling support requires LLVM directory includes.
+if(LIBOMP_PROFILING_SUPPORT)
+  include_directories(
+    ${LLVM_MAIN_INCLUDE_DIR}
+    ${LLVM_INCLUDE_DIR}
+  )
+endif()
+
 # Getting correct source files to build library
 set(LIBOMP_CXXFILES)
 set(LIBOMP_ASMFILES)
@@ -135,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS)
 
 libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS)
 # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled.
-if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING))
+if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING))
   add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES})
   # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS
   target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS})
@@ -144,6 +152,8 @@ else()
     LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}
     LINK_COMPONENTS Support
     )
+  # libomp must be a C++ library such that it can link libLLVMSupport
+  set(LIBOMP_LINKER_LANGUAGE CXX)
 endif()
 
 set_target_properties(omp PROPERTIES
diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake
index 3d682c690fc7..f6aee7197ee8 100644
--- a/openmp/runtime/src/kmp_config.h.cmake
+++ b/openmp/runtime/src/kmp_config.h.cmake
@@ -44,8 +44,8 @@
 #define OMPT_DEBUG LIBOMP_OMPT_DEBUG
 #cmakedefine01 LIBOMP_OMPT_SUPPORT
 #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
-#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT
-#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT
+#cmakedefine01 LIBOMP_PROFILING_SUPPORT
+#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT
 #cmakedefine01 LIBOMP_OMPT_OPTIONAL
 #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL
 #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 4a0634d59cff..a6e32bd008e1 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -32,7 +32,7 @@
 #include "ompt-specific.h"
 #endif
 
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
 #include "llvm/Support/TimeProfiler.h"
 static char *ProfileTraceFile = nullptr;
 #endif
@@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
 /* ------------------------------------------------------------------------ */
 
 void *__kmp_launch_thread(kmp_info_t *this_thr) {
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
   ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
   // TODO: add a configuration option for time granularity
   if (ProfileTraceFile)
@@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
   KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
   KMP_MB();
 
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
   llvm::timeTraceProfilerFinishThread();
 #endif
   return this_thr;

From 7d096f9bb350429628c6befce8f94dba4bbc6db9 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Wed, 27 Jan 2021 16:04:11 -0800
Subject: [PATCH 052/244] [CSSPGO] Support of CS profiles in extended binary
 format.

This change brings up support of context-sensitive profiles in the format of extended binary. Existing sample profile reader/writer/merger code is being tweaked to reflect the fact of bracketed input contexts, like (`[...]`). The paired brackets are also needed in extbinary profiles because we don't yet have an otherwise good way to tell calling contexts apart from regular function names since the context delimiter `@` can somehow serve as a part of the C++ mangled names.

Reviewed By: wmi, wenlei

Differential Revision: https://reviews.llvm.org/D95547

(cherry picked from commit 7e99bddfeaab2713a8bb6ca538da25b66e6efc59)
---
 llvm/include/llvm/ProfileData/SampleProf.h    | 19 ++--
 .../llvm/ProfileData/SampleProfReader.h       |  4 +
 llvm/lib/ProfileData/SampleProfReader.cpp     | 86 ++++++++++---------
 llvm/lib/ProfileData/SampleProfWriter.cpp     |  4 +-
 .../Transforms/IPO/SampleContextTracker.cpp   |  2 +-
 .../SampleProfile/profile-context-tracker.ll  |  4 +
 .../llvm-profdata/Inputs/cs-sample.proftext   | 36 ++++++++
 .../llvm-profdata/cs-sample-profile.test      |  4 +
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  |  2 +-
 10 files changed, 113 insertions(+), 50 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext
 create mode 100644 llvm/test/tools/llvm-profdata/cs-sample-profile.test

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index c45ace9e68c1..346bc4c81d86 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -439,9 +439,11 @@ class SampleContext {
   void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
   bool hasContext() const { return State != UnknownContext; }
   bool isBaseContext() const { return CallingContext.empty(); }
-  StringRef getName() const { return Name; }
+  StringRef getNameWithoutContext() const { return Name; }
   StringRef getCallingContext() const { return CallingContext; }
-  StringRef getNameWithContext() const { return FullContext; }
+  StringRef getNameWithContext(bool WithBracket = false) const {
+    return WithBracket ? InputContext : FullContext;
+  }
 
 private:
   // Give a context string, decode and populate internal states like
@@ -449,6 +451,7 @@ class SampleContext {
   // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
   void setContext(StringRef ContextStr, ContextStateMask CState) {
     assert(!ContextStr.empty());
+    InputContext = ContextStr;
     // Note that `[]` wrapped input indicates a full context string, otherwise
     // it's treated as context-less function name only.
     bool HasContext = ContextStr.startswith("[");
@@ -480,6 +483,9 @@ class SampleContext {
     }
   }
 
+  // Input context string including bracketed calling context and leaf function
+  // name
+  StringRef InputContext;
   // Full context string including calling context and leaf function name
   StringRef FullContext;
   // Function name for the associated sample profile
@@ -676,7 +682,8 @@ class FunctionSamples {
     Name = Other.getName();
     if (!GUIDToFuncNameMap)
       GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
-
+    if (Context.getNameWithContext(true).empty())
+      Context = Other.getContext();
     if (FunctionHash == 0) {
       // Set the function hash code for the target profile.
       FunctionHash = Other.getFunctionHash();
@@ -743,8 +750,10 @@ class FunctionSamples {
   StringRef getName() const { return Name; }
 
   /// Return function name with context.
-  StringRef getNameWithContext() const {
-    return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
+  StringRef getNameWithContext(bool WithBracket = false) const {
+    return FunctionSamples::ProfileIsCS
+               ? Context.getNameWithContext(WithBracket)
+               : Name;
   }
 
   /// Return the original function name.
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 3f52a2f6163b..999e75eddffa 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -488,8 +488,12 @@ class SampleProfileReader {
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
+  /// Whether function profiles are context-sensitive.
   bool ProfileIsCS = false;
 
+  /// Number of context-sensitive profiles.
+  uint32_t CSProfileCount = 0;
+
   /// \brief The format of sample.
   SampleProfileFormat Format = SPF_None;
 };
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index c42931174bc0..c9f41687c356 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -222,8 +222,6 @@ std::error_code SampleProfileReaderText::readImpl() {
   sampleprof_error Result = sampleprof_error::success;
 
   InlineCallStack InlineStack;
-  int CSProfileCount = 0;
-  int RegularProfileCount = 0;
   uint32_t ProbeProfileCount = 0;
 
   // SeenMetadata tracks whether we have processed metadata for the current
@@ -257,11 +255,9 @@ std::error_code SampleProfileReaderText::readImpl() {
       SampleContext FContext(FName);
       if (FContext.hasContext())
         ++CSProfileCount;
-      else
-        ++RegularProfileCount;
       Profiles[FContext] = FunctionSamples();
       FunctionSamples &FProfile = Profiles[FContext];
-      FProfile.setName(FContext.getName());
+      FProfile.setName(FContext.getNameWithoutContext());
       FProfile.setContext(FContext);
       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
@@ -324,13 +320,14 @@ std::error_code SampleProfileReaderText::readImpl() {
     }
   }
 
-  assert((RegularProfileCount == 0 || CSProfileCount == 0) &&
+  assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
   ProfileIsCS = (CSProfileCount > 0);
   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
          "Cannot have both probe-based profiles and regular profiles");
   ProfileIsProbeBased = (ProbeProfileCount > 0);
   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+  FunctionSamples::ProfileIsCS = ProfileIsCS;
 
   if (Result == sampleprof_error::success)
     computeSummary();
@@ -546,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
   if (std::error_code EC = FName.getError())
     return EC;
 
-  Profiles[*FName] = FunctionSamples();
-  FunctionSamples &FProfile = Profiles[*FName];
-  FProfile.setName(*FName);
-
+  SampleContext FContext(*FName);
+  Profiles[FContext] = FunctionSamples();
+  FunctionSamples &FProfile = Profiles[FContext];
+  FProfile.setName(FContext.getNameWithoutContext());
+  FProfile.setContext(FContext);
   FProfile.addHeadSamples(*NumHeadSamples);
 
+  if (FContext.hasContext())
+    CSProfileCount++;
+
   if (std::error_code EC = readProfile(FProfile))
     return EC;
   return sampleprof_error::success;
@@ -654,40 +655,44 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
         return EC;
     }
     assert(Data == End && "More data is read than expected");
-    return sampleprof_error::success;
-  }
-
-  if (Remapper) {
-    for (auto Name : FuncsToUse) {
-      Remapper->insert(Name);
+  } else {
+    if (Remapper) {
+      for (auto Name : FuncsToUse) {
+        Remapper->insert(Name);
+      }
     }
-  }
 
-  if (useMD5()) {
-    for (auto Name : FuncsToUse) {
-      auto GUID = std::to_string(MD5Hash(Name));
-      auto iter = FuncOffsetTable.find(StringRef(GUID));
-      if (iter == FuncOffsetTable.end())
-        continue;
-      const uint8_t *FuncProfileAddr = Start + iter->second;
-      assert(FuncProfileAddr < End && "out of LBRProfile section");
-      if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-        return EC;
-    }
-  } else {
-    for (auto NameOffset : FuncOffsetTable) {
-      auto FuncName = NameOffset.first;
-      if (!FuncsToUse.count(FuncName) &&
-          (!Remapper || !Remapper->exist(FuncName)))
-        continue;
-      const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-      assert(FuncProfileAddr < End && "out of LBRProfile section");
-      if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-        return EC;
+    if (useMD5()) {
+      for (auto Name : FuncsToUse) {
+        auto GUID = std::to_string(MD5Hash(Name));
+        auto iter = FuncOffsetTable.find(StringRef(GUID));
+        if (iter == FuncOffsetTable.end())
+          continue;
+        const uint8_t *FuncProfileAddr = Start + iter->second;
+        assert(FuncProfileAddr < End && "out of LBRProfile section");
+        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+          return EC;
+      }
+    } else {
+      for (auto NameOffset : FuncOffsetTable) {
+        SampleContext FContext(NameOffset.first);
+        auto FuncName = FContext.getNameWithoutContext();
+        if (!FuncsToUse.count(FuncName) &&
+            (!Remapper || !Remapper->exist(FuncName)))
+          continue;
+        const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+        assert(FuncProfileAddr < End && "out of LBRProfile section");
+        if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+          return EC;
+      }
     }
+    Data = End;
   }
 
-  Data = End;
+  assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
+         "Cannot have both context-sensitive and regular profile");
+  ProfileIsCS = (CSProfileCount > 0);
+  FunctionSamples::ProfileIsCS = ProfileIsCS;
   return sampleprof_error::success;
 }
 
@@ -887,7 +892,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
     if (std::error_code EC = Checksum.getError())
       return EC;
 
-    Profiles[*FName].setFunctionHash(*Checksum);
+    SampleContext FContext(*FName);
+    Profiles[FContext].setFunctionHash(*Checksum);
   }
   return sampleprof_error::success;
 }
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 71dba6281f76..d3bc05e06fdf 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -147,7 +147,7 @@ std::error_code SampleProfileWriterExtBinaryBase::write(
 std::error_code
 SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
   uint64_t Offset = OutputStream->tell();
-  StringRef Name = S.getName();
+  StringRef Name = S.getNameWithContext(true);
   FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
   encodeULEB128(S.getHeadSamples(), *OutputStream);
   return writeBody(S);
@@ -635,7 +635,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
 std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
   auto &OS = *OutputStream;
 
-  if (std::error_code EC = writeNameIdx(S.getName()))
+  if (std::error_code EC = writeNameIdx(S.getNameWithContext(true)))
     return EC;
 
   encodeULEB128(S.getTotalSamples(), OS);
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 37fc27e91100..660d79de667c 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -179,7 +179,7 @@ SampleContextTracker::SampleContextTracker(
     SampleContext Context(FuncSample.first(), RawContext);
     LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
     if (!Context.isBaseContext())
-      FuncToCtxtProfileSet[Context.getName()].insert(FSamples);
+      FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
     ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
     assert(!NewNode->getFunctionSamples() &&
            "New node can't have sample profile");
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
index ed32c2a0027b..adda7022047d 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
@@ -1,18 +1,22 @@
 ; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly
 ; based on inline decision, so post inline counts are accurate.
 
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t
+
 ; Note that we need new pass manager to enable top-down processing for sample profile loader
 ; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
 ;   main:3 @ _Z5funcAi
 ;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
 ;   _Z5funcBi:1 @ _Z8funcLeafi
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
 
 ; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
 ;   main:3 @ _Z5funcAi
 ;   _Z5funcAi:1 @ _Z8funcLeafi
 ;   _Z5funcBi:1 @ _Z8funcLeafi
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT
 
 
 @factor = dso_local global i32 3, align 4, !dbg !0
diff --git a/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext
new file mode 100644
index 000000000000..eead4d4d62f0
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/cs-sample.proftext
@@ -0,0 +1,36 @@
+[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11
+ 0: 6
+ 1: 6
+ 3: 287884
+ 4: 287864 _Z3fibi:315608
+ 15: 23
+[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
+ 0: 15
+ 1: 15
+ 3: 74946
+ 4: 74941 _Z3fibi:82359
+ 10: 23324
+ 11: 23327 _Z3fibi:25228
+ 15: 11
+[main]:154:0
+ 2: 12
+ 3: 18 _Z5funcAi:11
+ 3.1: 18 _Z5funcBi:19
+[external:12 @ main]:154:12
+ 2: 12
+ 3: 10 _Z5funcAi:7
+ 3.1: 10 _Z5funcBi:11
+[main:3.1 @ _Z5funcBi]:120:19
+ 0: 19
+ 1: 19 _Z8funcLeafi:20
+ 3: 12
+[externalA:17 @ _Z5funcBi]:120:3
+ 0: 3
+ 1: 3
+[external:10 @ _Z5funcBi]:120:10
+ 0: 10
+ 1: 10
+[main:3 @ _Z5funcAi]:99:11
+ 0: 10
+ 1: 10 _Z8funcLeafi:11
+ 3: 24
diff --git a/llvm/test/tools/llvm-profdata/cs-sample-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-profile.test
new file mode 100644
index 000000000000..04c573ddece3
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/cs-sample-profile.test
@@ -0,0 +1,4 @@
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample.proftext
+RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext
+RUN: llvm-profdata merge --sample --extbinary %p/Inputs/cs-sample.proftext -o %t.prof && llvm-profdata merge --sample --text %t.prof -o %t1.proftext
+RUN: diff -b %t1.proftext %S/Inputs/cs-sample.proftext
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 8dc43924c067..7e53c30c7579 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -696,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
           Remapper ? remapSamples(I->second, *Remapper, Result)
                    : FunctionSamples();
       FunctionSamples &Samples = Remapper ? Remapped : I->second;
-      StringRef FName = Samples.getName();
+      StringRef FName = Samples.getNameWithContext(true);
       MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
       if (Result != sampleprof_error::success) {
         std::error_code EC = make_error_code(Result);
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 265beccb84a8..7624fd3f2808 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -164,7 +164,7 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) {
   if (Ret.second) {
     SampleContext FContext(Ret.first->first(), RawContext);
     FunctionSamples &FProfile = Ret.first->second;
-    FProfile.setName(FContext.getName());
+    FProfile.setName(FContext.getNameWithoutContext());
     FProfile.setContext(FContext);
   }
   return Ret.first->second;

From f2cabaac9525ba4b86301136e21ec9aad6aaf326 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Sun, 31 Jan 2021 22:31:51 -0800
Subject: [PATCH 053/244] [CSSPGO] Tweaking inlining with pseudo probes.

Fixing up a couple places where `getCallSiteIdentifier` is needed to support pseudo-probe-based callsites.

Also fixing an issue in the extbinary profile reader where the metadata section is not fully scanned based on the number of profiles loaded only for the current module.

Reviewed By: wmi, wenlei

Differential Revision: https://reviews.llvm.org/D95791

(cherry picked from commit 224fee8219bb3aed34f13ce40935e1b3ede90a0f)
---
 llvm/lib/ProfileData/SampleProfReader.cpp     |   9 +-
 .../Transforms/IPO/SampleContextTracker.cpp   |  11 +-
 .../Inputs/pseudo-probe-inline.prof           |  18 ++
 .../SampleProfile/pseudo-probe-inline.ll      | 175 ++++++++++++++++++
 4 files changed, 204 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll

diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index c9f41687c356..370ffc8e2885 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -883,7 +883,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
   if (!ProfileIsProbeBased)
     return sampleprof_error::success;
-  for (unsigned I = 0; I < Profiles.size(); ++I) {
+  while (Data < End) {
     auto FName(readStringFromTable());
     if (std::error_code EC = FName.getError())
       return EC;
@@ -893,8 +893,13 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
       return EC;
 
     SampleContext FContext(*FName);
-    Profiles[FContext].setFunctionHash(*Checksum);
+    // No need to load metadata for profiles that are not loaded in the current
+    // module.
+    if (Profiles.count(FContext))
+      Profiles[FContext].setFunctionHash(*Checksum);
   }
+
+  assert(Data == End && "More data is read than expected");
   return sampleprof_error::success;
 }
 
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 660d79de667c..fad72985dedd 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -308,8 +308,7 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
     return;
 
   // Get the context that needs to be promoted
-  LineLocation CallSite(FunctionSamples::getOffset(DIL),
-                        DIL->getBaseDiscriminator());
+  LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
   ContextTrieNode *NodeToPromo =
       CallerNode->getChildContext(CallSite, CalleeName);
   if (!NodeToPromo)
@@ -370,9 +369,7 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
     return nullptr;
 
   return CallContext->getChildContext(
-      LineLocation(FunctionSamples::getOffset(DIL),
-                   DIL->getBaseDiscriminator()),
-      CalleeName);
+      FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
 }
 
 ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
@@ -386,8 +383,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
     if (Name.empty())
       Name = PrevDIL->getScope()->getSubprogram()->getName();
     S.push_back(
-        std::make_pair(LineLocation(FunctionSamples::getOffset(DIL),
-                                    DIL->getBaseDiscriminator()), Name));
+        std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+                       PrevDIL->getScope()->getSubprogram()->getLinkageName()));
     PrevDIL = DIL;
   }
 
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof
new file mode 100644
index 000000000000..fd3ff773e85d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-inline.prof
@@ -0,0 +1,18 @@
+[foo]:23:23
+ 1: 23
+ 2: 23 zen:23
+ !CFGChecksum: 281479271677951
+[foo:2 @ zen]:765858:23
+ 1: 23
+ 2: 382920
+ 3: 382915
+ !CFGChecksum: 138828622701
+[bar]:23:23
+ 1: 23
+ 2: 23 zen:23
+ !CFGChecksum: 281479271677951
+[bar:2 @ zen]:765858:23
+ 1: 23
+ 2: 382920
+ 3: 382915
+ !CFGChecksum: 138828622701
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
new file mode 100644
index 000000000000..a5033a0dc190
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -0,0 +1,175 @@
+; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s
+; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml
+
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2
+; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s
+; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@factor = dso_local global i32 3, align 4
+
+define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
+entry:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0)
+  %add = add nsw i32 %x, 100000, !dbg !19
+;; Check zen is fully inlined so there's no call to zen anymore.
+;; Check code from the inlining of zen is properly annotated here.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK-NOT: call i32 @zen
+  %call = call i32 @zen(i32 %add), !dbg !20
+  ret i32 %call, !dbg !21
+}
+
+; CHECK: define dso_local i32 @zen
+define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
+entry:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0)
+  %cmp = icmp sgt i32 %x, 0, !dbg !26
+  br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
+
+while.cond:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+  %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
+  %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
+  br i1 %cmp1, label %while.body, label %if.end, !dbg !31
+
+while.body:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
+  %0 = load volatile i32, i32* @factor, align 4, !dbg !32
+  %sub = sub nsw i32 %x.addr.0, %0, !dbg !39
+  br label %while.cond, !dbg !31
+
+while.cond2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
+  %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
+  %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
+  br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
+
+while.body4:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
+  %1 = load volatile i32, i32* @factor, align 4, !dbg !45
+  %add = add nsw i32 %x.addr.1, %1, !dbg !48
+  br label %while.cond2, !dbg !44
+
+if.end:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+  %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
+  ret i32 %x.addr.2, !dbg !51
+}
+
+; CHECK: !llvm.pseudo_probe_desc = !{![[#DESC0:]], ![[#DESC1:]]}
+; CHECK: ![[#DESC0]] = !{i64 [[#GUID1]], i64 [[#HASH1:]], !"foo"}
+; CHECK: ![[#DESC1]] = !{i64 [[#GUID2]], i64 [[#HASH2:]], !"zen"}
+; CHECK: ![[PD1]] = !{!"branch_weights", i32 25, i32 1}
+; CHECK: ![[PD2]] = !{!"branch_weights", i32 382916, i32 25}
+
+; Checking to see if YAML file is generated and contains remarks
+;YAML: --- !Passed
+;YAML-NEXT:  Pass:            sample-profile-inline
+;YAML-NEXT:  Name:            Inlined
+;YAML-NEXT:  DebugLoc:        { File: test.cpp, Line: 10, Column: 11 }
+;YAML-NEXT:  Function:        foo
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - Callee:          zen
+;YAML-NEXT:      DebugLoc:        { File: test.cpp, Line: 38, Column: 0 }
+;YAML-NEXT:    - String:          ' inlined into '
+;YAML-NEXT:    - Caller:          foo
+;YAML-NEXT:      DebugLoc:        { File: test.cpp, Line: 9, Column: 0 }
+;YAML-NEXT:    - String:          ' to match profiling context'
+;YAML-NEXT:    - String:          ' with '
+;YAML-NEXT:    - String:          '(cost='
+;YAML-NEXT:    - Cost:            '15'
+;YAML-NEXT:    - String:          ', threshold='
+;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - String:          ')'
+;YAML-NEXT:    - String:          ' at callsite '
+;YAML-NEXT:    - String:          foo
+;YAML-NEXT:    - String:          ':'
+;YAML-NEXT:    - Line:            '1'
+;YAML-NEXT:    - String:          ':'
+;YAML-NEXT:    - Column:          '11'
+;YAML-NEXT:    - String:          ';'
+;YAML-NEXT:  ...
+;YAML:  --- !Analysis
+;YAML-NEXT:  Pass:            sample-profile
+;YAML-NEXT:  Name:            AppliedSamples
+;YAML-NEXT:  DebugLoc:        { File: test.cpp, Line: 10, Column: 22 }
+;YAML-NEXT:  Function:        foo
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - String:          'Applied '
+;YAML-NEXT:    - NumSamples:      '23'
+;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
+;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ')'
+;YAML-NEXT:  ...
+;YAML:  --- !Analysis
+;YAML-NEXT:  Pass:            sample-profile
+;YAML-NEXT:  Name:            AppliedSamples
+;YAML-NEXT:  DebugLoc:        { File: test.cpp, Line: 39, Column: 9 }
+;YAML-NEXT:  Function:        foo
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - String:          'Applied '
+;YAML-NEXT:    - NumSamples:      '23'
+;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
+;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ')'
+;YAML-NEXT:  ...
+;YAML:  --- !Analysis
+;YAML-NEXT:  Pass:            sample-profile
+;YAML-NEXT:  Name:            AppliedSamples
+;YAML-NEXT:  DebugLoc:        { File: test.cpp, Line: 41, Column: 14 }
+;YAML-NEXT:  Function:        foo
+;YAML-NEXT:  Args:
+;YAML-NEXT:    - String:          'Applied '
+;YAML-NEXT:    - NumSamples:      '382920'
+;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
+;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ')'
+;YAML-NEXT:  ...
+
+attributes #0 = {"use-sample-profile"}
+
+!llvm.module.flags = !{!8, !9}
+
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3)
+!3 = !DIFile(filename: "test.cpp", directory: "test")
+!4 = !{}
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{i32 7, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 9, type: !13, scopeLine: 9, unit: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!7, !7}
+!18 = !DILocation(line: 0, scope: !12)
+!19 = !DILocation(line: 10, column: 22, scope: !12)
+!20 = !DILocation(line: 10, column: 11, scope: !12)
+!21 = !DILocation(line: 12, column: 3, scope: !12)
+!22 = distinct !DISubprogram(name: "zen", scope: !3, file: !3, line: 37, type: !13, scopeLine: 38, unit: !2)
+!25 = !DILocation(line: 0, scope: !22)
+!26 = !DILocation(line: 39, column: 9, scope: !27)
+!27 = distinct !DILexicalBlock(scope: !22, file: !3, line: 39, column: 7)
+!28 = !DILocation(line: 39, column: 7, scope: !22)
+!29 = !DILocation(line: 41, column: 14, scope: !30)
+!30 = distinct !DILexicalBlock(scope: !27, file: !3, line: 39, column: 14)
+!31 = !DILocation(line: 41, column: 5, scope: !30)
+!32 = !DILocation(line: 42, column: 16, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !30, file: !3, line: 41, column: 19)
+!38 = !DILocation(line: 42, column: 12, scope: !33)
+!39 = !DILocation(line: 42, column: 9, scope: !33)
+!42 = !DILocation(line: 48, column: 14, scope: !43)
+!43 = distinct !DILexicalBlock(scope: !27, file: !3, line: 46, column: 8)
+!44 = !DILocation(line: 48, column: 5, scope: !43)
+!45 = !DILocation(line: 49, column: 16, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !43, file: !3, line: 48, column: 19)
+!47 = !DILocation(line: 49, column: 12, scope: !46)
+!48 = !DILocation(line: 49, column: 9, scope: !46)
+!51 = !DILocation(line: 53, column: 3, scope: !22)

From b9fa16f2234edddf6e0f449a0e7b646ee9046cf3 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Fri, 22 Jan 2021 15:52:46 -0800
Subject: [PATCH 054/244] [CSSPGO] Passing the clang driver switch
 -fpseudo-probe-for-profiling to the linker.

As titled.

Reviewed By: wmi, wenlei

Differential Revision: https://reviews.llvm.org/D95271

(cherry picked from commit d3e2e3740d0730cb6788c771bb01a8f3e935bf2e)
---
 clang/include/clang/Driver/Options.td      |  2 +-
 clang/lib/Driver/ToolChains/CommonArgs.cpp |  5 +++++
 clang/test/Driver/pseudo-probe-lto.c       | 10 ++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Driver/pseudo-probe-lto.c

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 42c5319041d0..1f6c13d5cc96 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1147,7 +1147,7 @@ def fprofile_update_EQ : Joined<["-"], "fprofile-update=">,
 defm pseudo_probe_for_profiling : BoolFOption<"pseudo-probe-for-profiling",
   CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse,
   PosFlag<SetTrue, [], "Emit">, NegFlag<SetFalse, [], "Do not emit">,
-  BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiler">>;
+  BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiling">>;
 def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">,
     Group<f_Group>, Flags<[CC1Option, CoreOption]>,
     HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6a95aa5ec628..bcaea71dca94 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -605,6 +605,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
       CmdArgs.push_back("-plugin-opt=new-pass-manager");
   }
 
+  // Pass an option to enable pseudo probe emission.
+  if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
+                   options::OPT_fno_pseudo_probe_for_profiling, false))
+    CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling");
+
   // Setup statistics file output.
   SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
   if (!StatsFile.empty())
diff --git a/clang/test/Driver/pseudo-probe-lto.c b/clang/test/Driver/pseudo-probe-lto.c
new file mode 100644
index 000000000000..e319b8c0098b
--- /dev/null
+++ b/clang/test/Driver/pseudo-probe-lto.c
@@ -0,0 +1,10 @@
+// RUN: touch %t.o
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto=thin -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fno-pseudo-probe-for-profiling -fpseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=PROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto 2>&1 | FileCheck %s --check-prefix=NOPROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE
+// RUN: %clang -### %t.o -target x86_64-unknown-linux -flto -fpseudo-probe-for-profiling -fno-pseudo-probe-for-profiling 2>&1 | FileCheck %s --check-prefix=NOPROBE
+
+// PROBE: -plugin-opt=pseudo-probe-for-profiling
+// NOPROBE-NOT: -plugin-opt=pseudo-probe-for-profiling

From 27ff658e97528540e4425c0cb6400f3e5355f53a Mon Sep 17 00:00:00 2001
From: Wenlei He <aktoon@gmail.com>
Date: Sun, 3 Jan 2021 16:43:06 -0800
Subject: [PATCH 055/244] [CSSPGO] Call site prioritized inlining for sample
 PGO

This change implemented call site prioritized BFS profile guided inlining for sample profile loader. The new inlining strategy maximize the benefit of context-sensitive profile as mentioned in the follow up discussion of CSSPGO RFC. The change will not affect today's AutoFDO as it's opt-in. CSSPGO now defaults to the new FDO inliner, but can fall back to today's replay inliner using a switch (`-sample-profile-prioritized-inline=0`).

Motivation

With baseline AutoFDO, the inliner in sample profile loader only replays previous inlining, and the use of profile is only for pruning previous inlining that turned out to be cold. Due to the nature of replay, the FDO inliner is simple with hotness being the only decision factor. It has the following limitations that we're improving now for CSSPGO.
 - It doesn't take inline candidate size into account. Since it's doing replay, the size growth is bounded by previous CGSCC inlining. With context-sensitive profile, FDO inliner is no longer limited by previous inlining, so we need to take size into account to avoid significant size bloat.
 - The way it looks at hotness is not accurate. It uses total samples in an inlinee as proxy for hotness, while what really matters for an inline decision is the call site count. This is an unfortunate fall back because call site count and callee entry count are not reliable due to dwarf based correlation, especially for inlinees. Now paired with pseudo-probe, we have accurate call site count and callee's entry count, so we can use that to gauge hotness more accurately.
 - It treats all call sites from a block as hot as long as there's one call site considered hot. This is normally true, but since total samples is used as hotness proxy, this transitiveness within block magnifies the inacurate hotness heuristic. With pseduo-probe and the change above, this is no longer an issue for CSSPGO.

New FDO Inliner

Putting all the requirement for CSSPGO together, we need a top-down call site prioritized BFS inliner. Here're reasons why each component is needed.
 - Top-down: We need a top-down inliner to better leverage context-sensitive profile, so inlining is driven by accurate context profile, and post-inline is also accurate. This is already implemented in https://reviews.llvm.org/D70655.
 - Size Cap: For top-down inliner, taking function size into account for inline decision alone isn't sufficient to control size growth. We also need to explicitly cap size growth because with top-down inlining, we can grow inliner size significantly with large number of smaller inlinees even if each individually passes the cost/size check.
 - Prioritize call sites: With size cap, inlining order also becomes important, because if we stop inlining due to size budget limit, we'd want to use budget towards the most beneficial call sites.
 - BFS inline: Same as call site prioritization, if we stop inlining due to size budget limit, we want a balanced inline tree, rather than going deep on one call path.

Note that the new inliner avoids repeatedly evaluating same set of call site, so it should help with compile time too. For this reason, we could transition today's FDO inliner to use a queue with equal priority to avoid wasted reevaluation of same call site (TODO).

Speculative indirect call promotion and inlining is also supported now with CSSPGO just like baseline AutoFDO.

Tunings and knobs

I created tuning knobs for size growth/cap control, and for hot threshold separate from CGSCC inliner. The default values are selected based on initial tuning with CSSPGO.

Results

Evaluated with an internal LLVM fork couple months ago, plus another change to adjust hot-threshold cutoff for context profile (will send up after this one), the new inliner show ~1% geomean perf win on spec2006 with CSSPGO, while reducing code size too. The measurement was done using train-train setup, MonoLTO w/ new pass manager and pseudo-probe. Note that this is just a starting point - we hope that the new inliner will open up more opportunity with CSSPGO, but it will certainly take more time and effort to make it fully calibrated and ready for bigger workloads (we're working on it).

Differential Revision: https://reviews.llvm.org/D94001

(cherry picked from commit 6bae5973c476e16dbbc82030d65c7859a6628e89)
---
 .../Transforms/IPO/SampleContextTracker.h     |   6 +-
 .../Transforms/IPO/SampleContextTracker.cpp   |  73 ++-
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 443 ++++++++++++++++--
 .../Inputs/indirect-call-csspgo.prof          |  10 +
 .../SampleProfile/csspgo-inline-debug.ll      | 166 +++++++
 .../SampleProfile/csspgo-inline-icall.ll      |  63 +++
 .../Transforms/SampleProfile/csspgo-inline.ll | 180 +++++++
 .../profile-context-tracker-debug.ll          |  25 +-
 .../SampleProfile/profile-context-tracker.ll  |  15 +-
 .../SampleProfile/pseudo-probe-inline.ll      |   4 +-
 10 files changed, 904 insertions(+), 81 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-inline.ll

diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 5b2600144fa3..526e141838c4 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -23,6 +23,7 @@
 #include "llvm/ProfileData/SampleProf.h"
 #include <list>
 #include <map>
+#include <vector>
 
 using namespace llvm;
 using namespace sampleprof;
@@ -42,7 +43,7 @@ class ContextTrieNode {
         CallSiteLoc(CallLoc){};
   ContextTrieNode *getChildContext(const LineLocation &CallSite,
                                    StringRef CalleeName);
-  ContextTrieNode *getChildContext(const LineLocation &CallSite);
+  ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
   ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
                                            StringRef CalleeName,
                                            bool AllowCreate = true);
@@ -94,6 +95,9 @@ class SampleContextTracker {
   // call-site. The full context is identified by location of call instruction.
   FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
                                               StringRef CalleeName);
+  // Get samples for indirect call targets for call site at given location.
+  std::vector<const FunctionSamples *>
+  getIndirectCalleeContextSamplesFor(const DILocation *DIL);
   // Query context profile for a given location. The full context
   // is identified by input DILocation.
   FunctionSamples *getContextSamplesFor(const DILocation *DIL);
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index fad72985dedd..41d7f363e1a4 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -30,7 +30,7 @@ namespace llvm {
 ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
                                                   StringRef CalleeName) {
   if (CalleeName.empty())
-    return getChildContext(CallSite);
+    return getHottestChildContext(CallSite);
 
   uint32_t Hash = nodeHash(CalleeName, CallSite);
   auto It = AllChildContext.find(Hash);
@@ -40,18 +40,22 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
 }
 
 ContextTrieNode *
-ContextTrieNode::getChildContext(const LineLocation &CallSite) {
+ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
   // CSFDO-TODO: This could be slow, change AllChildContext so we can
   // do point look up for child node by call site alone.
-  // CSFDO-TODO: Return the child with max count for indirect call
+  // Retrieve the child node with max count for indirect call
   ContextTrieNode *ChildNodeRet = nullptr;
+  uint64_t MaxCalleeSamples = 0;
   for (auto &It : AllChildContext) {
     ContextTrieNode &ChildNode = It.second;
-    if (ChildNode.CallSiteLoc == CallSite) {
-      if (ChildNodeRet)
-        return nullptr;
-      else
-        ChildNodeRet = &ChildNode;
+    if (ChildNode.CallSiteLoc != CallSite)
+      continue;
+    FunctionSamples *Samples = ChildNode.getFunctionSamples();
+    if (!Samples)
+      continue;
+    if (Samples->getTotalSamples() > MaxCalleeSamples) {
+      ChildNodeRet = &ChildNode;
+      MaxCalleeSamples = Samples->getTotalSamples();
     }
   }
 
@@ -191,12 +195,12 @@ FunctionSamples *
 SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
                                                  StringRef CalleeName) {
   LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n");
-  // CSFDO-TODO: We use CalleeName to differentiate indirect call
-  // We need to get sample for indirect callee too.
   DILocation *DIL = Inst.getDebugLoc();
   if (!DIL)
     return nullptr;
 
+  // For indirect call, CalleeName will be empty, in which case the context
+  // profile for callee with largest total samples will be returned.
   ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
   if (CalleeContext) {
     FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
@@ -209,6 +213,26 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
   return nullptr;
 }
 
+std::vector<const FunctionSamples *>
+SampleContextTracker::getIndirectCalleeContextSamplesFor(
+    const DILocation *DIL) {
+  std::vector<const FunctionSamples *> R;
+  if (!DIL)
+    return R;
+
+  ContextTrieNode *CallerNode = getContextFor(DIL);
+  LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+  for (auto &It : CallerNode->getAllChildContext()) {
+    ContextTrieNode &ChildNode = It.second;
+    if (ChildNode.getCallSiteLoc() != CallSite)
+      continue;
+    if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples())
+      R.push_back(CalleeSamples);
+  }
+
+  return R;
+}
+
 FunctionSamples *
 SampleContextTracker::getContextSamplesFor(const DILocation *DIL) {
   assert(DIL && "Expect non-null location");
@@ -295,11 +319,6 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
     const Instruction &Inst, StringRef CalleeName) {
   LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
                     << Inst << "\n");
-  // CSFDO-TODO: We also need to promote context profile from indirect
-  // calls. We won't have callee names from those from call instr.
-  if (CalleeName.empty())
-    return;
-
   // Get the caller context for the call instruction, we don't use callee
   // name from call because there can be context from indirect calls too.
   DILocation *DIL = Inst.getDebugLoc();
@@ -309,6 +328,22 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
 
   // Get the context that needs to be promoted
   LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+  // For indirect call, CalleeName will be empty, in which case we need to
+  // promote all non-inlined child context profiles.
+  if (CalleeName.empty()) {
+    for (auto &It : CallerNode->getAllChildContext()) {
+      ContextTrieNode *NodeToPromo = &It.second;
+      if (CallSite != NodeToPromo->getCallSiteLoc())
+        continue;
+      FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples();
+      if (FromSamples && FromSamples->getContext().hasState(InlinedContext))
+        continue;
+      promoteMergeContextSamplesTree(*NodeToPromo);
+    }
+    return;
+  }
+
+  // Get the context for the given callee that needs to be promoted
   ContextTrieNode *NodeToPromo =
       CallerNode->getChildContext(CallSite, CalleeName);
   if (!NodeToPromo)
@@ -328,6 +363,8 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
   LLVM_DEBUG(dbgs() << "  Found context tree root to promote: "
                     << FromSamples->getContext() << "\n");
 
+  assert(!FromSamples->getContext().hasState(InlinedContext) &&
+         "Shouldn't promote inlined context profile");
   StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
   return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
                                         ContextStrToRemove);
@@ -360,14 +397,12 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
                                           StringRef CalleeName) {
   assert(DIL && "Expect non-null location");
 
-  // CSSPGO-TODO: need to support indirect callee
-  if (CalleeName.empty())
-    return nullptr;
-
   ContextTrieNode *CallContext = getContextFor(DIL);
   if (!CallContext)
     return nullptr;
 
+  // When CalleeName is empty, the child context profile with max
+  // total samples will be returned.
   return CallContext->getChildContext(
       FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
 }
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 264ac4065e8c..665c4078f3ee 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -108,6 +109,14 @@ STATISTIC(NumMismatchedProfile,
           "Number of functions with CFG mismatched profile");
 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
 
+STATISTIC(NumCSInlinedHitMinLimit,
+          "Number of functions with FDO inline stopped due to min size limit");
+STATISTIC(NumCSInlinedHitMaxLimit,
+          "Number of functions with FDO inline stopped due to max size limit");
+STATISTIC(
+    NumCSInlinedHitGrowthLimit,
+    "Number of functions with FDO inline stopped due to growth size limit");
+
 // Command line option to specify the file to read samples from. This is
 // mainly used for debugging.
 static cl::opt<std::string> SampleProfileFile(
@@ -171,6 +180,38 @@ static cl::opt<bool> ProfileSizeInline(
     cl::desc("Inline cold call sites in profile loader if it's beneficial "
              "for code size."));
 
+static cl::opt<int> ProfileInlineGrowthLimit(
+    "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
+    cl::desc("The size growth ratio limit for proirity-based sample profile "
+             "loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMin(
+    "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
+    cl::desc("The lower bound of size growth limit for "
+             "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMax(
+    "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
+    cl::desc("The upper bound of size growth limit for "
+             "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileICPThreshold(
+    "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
+    cl::desc(
+        "Relative hotness threshold for indirect "
+        "call promotion in proirity-based sample profile loader inlining."));
+
+static cl::opt<int> SampleHotCallSiteThreshold(
+    "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
+    cl::desc("Hot callsite threshold for proirity-based sample profile loader "
+             "inlining."));
+
+static cl::opt<bool> CallsitePrioritizedInline(
+    "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
+    cl::init(false),
+    cl::desc("Use call site prioritized inlining for sample profile loader."
+             "Currently only CSSPGO is supported."));
+
 static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
@@ -313,6 +354,31 @@ class GUIDToFuncNameMapper {
   DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
 };
 
+// Inline candidate used by iterative callsite prioritized inliner
+struct InlineCandidate {
+  CallBase *CallInstr;
+  const FunctionSamples *CalleeSamples;
+  uint64_t CallsiteCount;
+};
+
+// Inline candidate comparer using call site weight
+struct CandidateComparer {
+  bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
+    if (LHS.CallsiteCount != RHS.CallsiteCount)
+      return LHS.CallsiteCount < RHS.CallsiteCount;
+
+    // Tie breaker using GUID so we have stable/deterministic inlining order
+    assert(LHS.CalleeSamples && RHS.CalleeSamples &&
+           "Expect non-null FunctionSamples");
+    return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
+           RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
+  }
+};
+
+using CandidateQueue =
+    PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
+                  CandidateComparer>;
+
 /// Sample profile pass.
 ///
 /// This pass reads profile data from the file specified by
@@ -350,9 +416,23 @@ class SampleProfileLoader {
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
   mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
-  bool inlineCallInstruction(CallBase &CB);
+  CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName,
+                                   uint64_t &Sum, uint64_t Count, CallBase *I,
+                                   const char *&Reason);
+  bool inlineCallInstruction(CallBase &CB,
+                             const FunctionSamples *CalleeSamples);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+  // Helper functions call-site prioritized BFS inliner
+  // Will change the main FDO inliner to be work list based directly in
+  // upstream, then merge this change with that and remove the duplication.
+  InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
+  bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
+  bool tryInlineCandidate(InlineCandidate &Candidate,
+                          SmallVector<CallBase *, 8> &InlinedCallSites);
+  bool
+  inlineHotFunctionsWithPriority(Function &F,
+                                 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
   // Inline cold/small functions in addition to hot ones
   bool shouldInlineColdCallee(CallBase &CallInst);
   void emitOptimizationRemarksForInlineCandidates(
@@ -918,6 +998,31 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
     return R;
   }
 
+  auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
+    assert(L && R && "Expect non-null FunctionSamples");
+    if (L->getEntrySamples() != R->getEntrySamples())
+      return L->getEntrySamples() > R->getEntrySamples();
+    return FunctionSamples::getGUID(L->getName()) <
+           FunctionSamples::getGUID(R->getName());
+  };
+
+  if (ProfileIsCS) {
+    auto CalleeSamples =
+        ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
+    if (CalleeSamples.empty())
+      return R;
+
+    // For CSSPGO, we only use target context profile's entry count
+    // as that already includes both inlined callee and non-inlined ones..
+    Sum = 0;
+    for (const auto *const FS : CalleeSamples) {
+      Sum += FS->getEntrySamples();
+      R.push_back(FS);
+    }
+    llvm::sort(R, FSCompare);
+    return R;
+  }
+
   const FunctionSamples *FS = findFunctionSamples(Inst);
   if (FS == nullptr)
     return R;
@@ -935,12 +1040,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
       Sum += NameFS.second.getEntrySamples();
       R.push_back(&NameFS.second);
     }
-    llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
-      if (L->getEntrySamples() != R->getEntrySamples())
-        return L->getEntrySamples() > R->getEntrySamples();
-      return FunctionSamples::getGUID(L->getName()) <
-             FunctionSamples::getGUID(R->getName());
-    });
+    llvm::sort(R, FSCompare);
   }
   return R;
 }
@@ -977,7 +1077,32 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return it.first->second;
 }
 
-bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+CallBase *
+SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName,
+                                            uint64_t &Sum, uint64_t Count,
+                                            CallBase *I, const char *&Reason) {
+  Reason = "Callee function not available";
+  // R->getValue() != &F is to prevent promoting a recursive call.
+  // If it is a recursive call, we do not inline it as it could bloat
+  // the code exponentially. There is way to better handle this, e.g.
+  // clone the caller first, and inline the cloned caller if it is
+  // recursive. As llvm does not inline recursive calls, we will
+  // simply ignore it instead of handling it explicitly.
+  auto R = SymbolMap.find(CalleeName);
+  if (R != SymbolMap.end() && R->getValue() &&
+      !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
+      R->getValue()->hasFnAttribute("use-sample-profile") &&
+      R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) {
+    auto *DI =
+        &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE);
+    Sum -= Count;
+    return DI;
+  }
+  return nullptr;
+}
+
+bool SampleProfileLoader::inlineCallInstruction(
+    CallBase &CB, const FunctionSamples *CalleeSamples) {
   if (ExternalInlineAdvisor) {
     auto Advice = ExternalInlineAdvisor->getAdvice(CB);
     if (!Advice->isInliningRecommended()) {
@@ -1012,6 +1137,9 @@ bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
     // The call to InlineFunction erases I, so we can't pass it here.
     emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
                     true, CSINLINE_DEBUG);
+    if (ProfileIsCS)
+      ContextTracker->markContextSamplesInlined(CalleeSamples);
+    ++NumCSInlined;
     return true;
   }
   return false;
@@ -1129,34 +1257,17 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          const char *Reason = "Callee function not available";
-          // R->getValue() != &F is to prevent promoting a recursive call.
-          // If it is a recursive call, we do not inline it as it could bloat
-          // the code exponentially. There is way to better handle this, e.g.
-          // clone the caller first, and inline the cloned caller if it is
-          // recursive. As llvm does not inline recursive calls, we will
-          // simply ignore it instead of handling it explicitly.
+          const char *Reason = nullptr;
           auto CalleeFunctionName = FS->getFuncName();
-          auto R = SymbolMap.find(CalleeFunctionName);
-          if (R != SymbolMap.end() && R->getValue() &&
-              !R->getValue()->isDeclaration() &&
-              R->getValue()->getSubprogram() &&
-              R->getValue()->hasFnAttribute("use-sample-profile") &&
-              R->getValue() != &F &&
-              isLegalToPromote(*I, R->getValue(), &Reason)) {
-            uint64_t C = FS->getEntrySamples();
-            auto &DI =
-                pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE);
-            Sum -= C;
+          if (CallBase *DI =
+                  tryPromoteIndirectCall(F, CalleeFunctionName, Sum,
+                                         FS->getEntrySamples(), I, Reason)) {
             PromotedInsns.insert(I);
             // If profile mismatches, we should not attempt to inline DI.
             if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(cast<CallBase>(DI))) {
-              if (ProfileIsCS)
-                ContextTracker->markContextSamplesInlined(FS);
+                inlineCallInstruction(cast<CallBase>(*DI), FS)) {
               localNotInlinedCallSites.erase(I);
               LocalChanged = true;
-              ++NumCSInlined;
             }
           } else {
             LLVM_DEBUG(dbgs()
@@ -1166,13 +1277,11 @@ bool SampleProfileLoader::inlineHotFunctions(
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(*I)) {
-          if (ProfileIsCS)
-            ContextTracker->markContextSamplesInlined(
-                localNotInlinedCallSites[I]);
+        if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I)
+                                          ? localNotInlinedCallSites[I]
+                                          : nullptr)) {
           localNotInlinedCallSites.erase(I);
           LocalChanged = true;
-          ++NumCSInlined;
         }
       } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
         findCalleeFunctionSamples(*I)->findInlinedFunctions(
@@ -1186,6 +1295,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
   }
 
+  // For CS profile, profile for not inlined context will be merged when
+  // base profile is being trieved
+  if (ProfileIsCS)
+    return Changed;
+
   // Accumulate not inlined callsite information into notInlinedSamples
   for (const auto &Pair : localNotInlinedCallSites) {
     CallBase *I = Pair.getFirst();
@@ -1232,6 +1346,254 @@ bool SampleProfileLoader::inlineHotFunctions(
   return Changed;
 }
 
+bool SampleProfileLoader::tryInlineCandidate(
+    InlineCandidate &Candidate, SmallVector<CallBase *, 8> &InlinedCallSites) {
+
+  CallBase &CB = *Candidate.CallInstr;
+  Function *CalledFunction = CB.getCalledFunction();
+  assert(CalledFunction && "Expect a callee with definition");
+  DebugLoc DLoc = CB.getDebugLoc();
+  BasicBlock *BB = CB.getParent();
+
+  InlineCost Cost = shouldInlineCandidate(Candidate);
+  if (Cost.isNever()) {
+    ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+              << "incompatible inlining");
+    return false;
+  }
+
+  if (!Cost)
+    return false;
+
+  InlineFunctionInfo IFI(nullptr, GetAC);
+  if (InlineFunction(CB, IFI).isSuccess()) {
+    // The call to InlineFunction erases I, so we can't pass it here.
+    emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+                    true, CSINLINE_DEBUG);
+
+    // Now populate the list of newly exposed call sites.
+    InlinedCallSites.clear();
+    for (auto &I : IFI.InlinedCallSites)
+      InlinedCallSites.push_back(I);
+
+    if (ProfileIsCS)
+      ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+    ++NumCSInlined;
+    return true;
+  }
+  return false;
+}
+
+bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
+                                             CallBase *CB) {
+  assert(CB && "Expect non-null call instruction");
+
+  if (isa<IntrinsicInst>(CB))
+    return false;
+
+  // Find the callee's profile. For indirect call, find hottest target profile.
+  const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
+  if (!CalleeSamples)
+    return false;
+
+  uint64_t CallsiteCount = 0;
+  ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
+  if (Weight)
+    CallsiteCount = Weight.get();
+  if (CalleeSamples)
+    CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples());
+
+  *NewCandidate = {CB, CalleeSamples, CallsiteCount};
+  return true;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
+
+  std::unique_ptr<InlineAdvice> Advice = nullptr;
+  if (ExternalInlineAdvisor) {
+    Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
+    if (!Advice->isInliningRecommended()) {
+      Advice->recordUnattemptedInlining();
+      return InlineCost::getNever("not previously inlined");
+    }
+    Advice->recordInlining();
+    return InlineCost::getAlways("previously inlined");
+  }
+
+  // Adjust threshold based on call site hotness, only do this for callsite
+  // prioritized inliner because otherwise cost-benefit check is done earlier.
+  int SampleThreshold = SampleColdCallSiteThreshold;
+  if (CallsitePrioritizedInline) {
+    if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
+      SampleThreshold = SampleHotCallSiteThreshold;
+    else if (!ProfileSizeInline)
+      return InlineCost::getNever("cold callsite");
+  }
+
+  Function *Callee = Candidate.CallInstr->getCalledFunction();
+  assert(Callee && "Expect a definition for inline candidate of direct call");
+
+  InlineParams Params = getInlineParams();
+  Params.ComputeFullInlineCost = true;
+  // Checks if there is anything in the reachable portion of the callee at
+  // this callsite that makes this inlining potentially illegal. Need to
+  // set ComputeFullInlineCost, otherwise getInlineCost may return early
+  // when cost exceeds threshold without checking all IRs in the callee.
+  // The acutal cost does not matter because we only checks isNever() to
+  // see if it is legal to inline the callsite.
+  InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
+                                  GetTTI(*Callee), GetAC, GetTLI);
+
+  // For old FDO inliner, we inline the call site as long as cost is not
+  // "Never". The cost-benefit check is done earlier.
+  if (!CallsitePrioritizedInline) {
+    if (Cost.isNever())
+      return Cost;
+    return InlineCost::getAlways("hot callsite previously inlined");
+  }
+
+  // Honor always inline and never inline from call analyzer
+  if (Cost.isNever() || Cost.isAlways())
+    return Cost;
+
+  // Otherwise only use the cost from call analyzer, but overwite threshold with
+  // Sample PGO threshold.
+  return InlineCost::get(Cost.getCost(), SampleThreshold);
+}
+
+bool SampleProfileLoader::inlineHotFunctionsWithPriority(
+    Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+  DenseSet<Instruction *> PromotedInsns;
+  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
+
+  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+  // Profile symbol list is ignored when profile-sample-accurate is on.
+  assert((!ProfAccForSymsInList ||
+          (!ProfileSampleAccurate &&
+           !F.hasFnAttribute("profile-sample-accurate"))) &&
+         "ProfAccForSymsInList should be false when profile-sample-accurate "
+         "is enabled");
+
+  // Populating worklist with initial call sites from root inliner, along
+  // with call site weights.
+  CandidateQueue CQueue;
+  InlineCandidate NewCandidate;
+  for (auto &BB : F) {
+    for (auto &I : BB.getInstList()) {
+      auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB)
+        continue;
+      if (getInlineCandidate(&NewCandidate, CB))
+        CQueue.push(NewCandidate);
+    }
+  }
+
+  // Cap the size growth from profile guided inlining. This is needed even
+  // though cost of each inline candidate already accounts for callee size,
+  // because with top-down inlining, we can grow inliner size significantly
+  // with large number of smaller inlinees each pass the cost check.
+  assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&
+         "Max inline size limit should not be smaller than min inline size "
+         "limit.");
+  unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
+  SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
+  SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
+  if (ExternalInlineAdvisor)
+    SizeLimit = std::numeric_limits<unsigned>::max();
+
+  // Perform iterative BFS call site prioritized inlining
+  bool Changed = false;
+  while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
+    InlineCandidate Candidate = CQueue.top();
+    CQueue.pop();
+    CallBase *I = Candidate.CallInstr;
+    Function *CalledFunction = I->getCalledFunction();
+
+    if (CalledFunction == &F)
+      continue;
+    if (I->isIndirectCall()) {
+      if (PromotedInsns.count(I))
+        continue;
+      uint64_t Sum;
+      auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
+      uint64_t SumOrigin = Sum;
+      for (const auto *FS : CalleeSamples) {
+        // TODO: Consider disable pre-lTO ICP for MonoLTO as well
+        if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+          FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+                                   PSI->getOrCompHotCountThreshold());
+          continue;
+        }
+        uint64_t EntryCountDistributed = FS->getEntrySamples();
+        // In addition to regular inline cost check, we also need to make sure
+        // ICP isn't introducing excessive speculative checks even if individual
+        // target looks beneficial to promote and inline. That means we should
+        // only do ICP when there's a small number dominant targets.
+        if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
+          break;
+        // TODO: Fix CallAnalyzer to handle all indirect calls.
+        // For indirect call, we don't run CallAnalyzer to get InlineCost
+        // before actual inlining. This is because we could see two different
+        // types from the same definition, which makes CallAnalyzer choke as
+        // it's expecting matching parameter type on both caller and callee
+        // side. See example from PR18962 for the triggering cases (the bug was
+        // fixed, but we generate different types).
+        if (!PSI->isHotCount(EntryCountDistributed))
+          break;
+        const char *Reason = nullptr;
+        auto CalleeFunctionName = FS->getFuncName();
+        if (CallBase *DI = tryPromoteIndirectCall(
+                F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) {
+          // Attach function profile for promoted indirect callee, and update
+          // call site count for the promoted inline candidate too.
+          Candidate = {DI, FS, EntryCountDistributed};
+          PromotedInsns.insert(I);
+          SmallVector<CallBase *, 8> InlinedCallSites;
+          // If profile mismatches, we should not attempt to inline DI.
+          if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
+              tryInlineCandidate(Candidate, InlinedCallSites)) {
+            for (auto *CB : InlinedCallSites) {
+              if (getInlineCandidate(&NewCandidate, CB))
+                CQueue.emplace(NewCandidate);
+            }
+            Changed = true;
+          }
+        } else {
+          LLVM_DEBUG(dbgs()
+                     << "\nFailed to promote indirect call to "
+                     << CalleeFunctionName << " because " << Reason << "\n");
+        }
+      }
+    } else if (CalledFunction && CalledFunction->getSubprogram() &&
+               !CalledFunction->isDeclaration()) {
+      SmallVector<CallBase *, 8> InlinedCallSites;
+      if (tryInlineCandidate(Candidate, InlinedCallSites)) {
+        for (auto *CB : InlinedCallSites) {
+          if (getInlineCandidate(&NewCandidate, CB))
+            CQueue.emplace(NewCandidate);
+        }
+        Changed = true;
+      }
+    } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+      findCalleeFunctionSamples(*I)->findInlinedFunctions(
+          InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
+    }
+  }
+
+  if (!CQueue.empty()) {
+    if (SizeLimit == (unsigned)ProfileInlineLimitMax)
+      ++NumCSInlinedHitMaxLimit;
+    else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
+      ++NumCSInlinedHitMinLimit;
+    else
+      ++NumCSInlinedHitGrowthLimit;
+  }
+
+  return Changed;
+}
+
 /// Find equivalence classes for the given block.
 ///
 /// This finds all the blocks that are guaranteed to execute the same
@@ -1833,7 +2195,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
   }
 
   DenseSet<GlobalValue::GUID> InlinedGUIDs;
-  Changed |= inlineHotFunctions(F, InlinedGUIDs);
+  if (ProfileIsCS && CallsitePrioritizedInline)
+    Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
+  else
+    Changed |= inlineHotFunctions(F, InlinedGUIDs);
 
   // Compute basic block weights.
   Changed |= computeBlockWeights(F);
@@ -1978,6 +2343,12 @@ bool SampleProfileLoader::doInitialization(Module &M,
     ProfileIsCS = true;
     FunctionSamples::ProfileIsCS = true;
 
+    // Enable priority-base inliner and size inline by default for CSSPGO.
+    if (!ProfileSizeInline.getNumOccurrences())
+      ProfileSizeInline = true;
+    if (!CallsitePrioritizedInline.getNumOccurrences())
+      CallsitePrioritizedInline = true;
+
     // Tracker for profiles under different context
     ContextTracker =
         std::make_unique<SampleContextTracker>(Reader->getProfiles());
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof
new file mode 100644
index 000000000000..095c7a1fc480
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call-csspgo.prof
@@ -0,0 +1,10 @@
+[test]:63067:0
+ 1: 3345 _Z3barv:1398 _Z3foov:2059
+ 2: 100 _Z3bazv:102
+ 3: 100 _Z3zoov:102
+[test:1 @ _Z3barv]:200:100
+ 1: 100
+[test:1 @ _Z3foov]:4220:1200
+ 14: 4220
+[test:2 @ _Z3bazv]:200:100
+ 5: 100
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
new file mode 100644
index 000000000000..e5f2f7571eaf
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-debug.ll
@@ -0,0 +1,166 @@
+; REQUIRES: asserts
+; Test that the new FDO inliner using prioty queue will not visit same call site again and again.
+; Use debug prints as repeated call site evaluation is not visible from final inline decision.
+
+; Note that we need new pass manager to enable top-down processing for sample profile loader
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=OLD-INLINE
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=1 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=NEW-INLINE
+
+; Old inliner will evaluate the same call site three times
+; OLD-INLINE:      Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
+; OLD-INLINE-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
+; OLD-INLINE:      Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
+; OLD-INLINE-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
+; OLD-INLINE:      Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
+; OLD-INLINE-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
+
+; New inliner only evaluate the same call site once
+; NEW-INLINE:      Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
+; NEW-INLINE-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
+; NEW-INLINE-NOT:  Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
+; NEW-INLINE-NOT:    Callee context found: main:3.1 @ _Z5funcBi
+
+@factor = dso_local global i32 3, align 4, !dbg !0
+
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
+entry:
+  br label %for.body, !dbg !25
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add3, !dbg !27
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
+  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
+  %add = add nuw nsw i32 %x.011, 1, !dbg !31
+  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
+  %add2 = add i32 %call, %r.010, !dbg !34
+  %add3 = add i32 %add2, %call1, !dbg !35
+  %dec = add nsw i32 %x.011, -1, !dbg !36
+  %cmp = icmp eq i32 %x.011, 0, !dbg !38
+  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
+}
+
+define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
+entry:
+  %add = add nsw i32 %x, 100000, !dbg !44
+  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
+  ret i32 %call, !dbg !46
+}
+
+define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
+entry:
+  %cmp = icmp sgt i32 %x, 0, !dbg !57
+  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
+
+while.cond2.preheader:                            ; preds = %entry
+  %cmp313 = icmp slt i32 %x, 0, !dbg !60
+  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
+  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
+  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
+  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
+  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
+  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
+
+while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
+  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
+  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
+  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
+  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
+  %cmp3 = icmp slt i32 %add, 0, !dbg !60
+  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
+
+if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
+  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
+  ret i32 %x.addr.2, !dbg !76
+}
+
+define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !51
+  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
+  ret i32 %call, !dbg !53
+}
+
+declare i32 @_Z3fibi(i32)
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
+!4 = !{}
+!5 = !{!6, !10, !11}
+!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!12 = !{!0}
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!14 = !{i32 7, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{i32 1, !"wchar_size", i32 4}
+!17 = !{!"clang version 11.0.0"}
+!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!9}
+!21 = !{!22, !23}
+!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
+!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
+!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
+!25 = !DILocation(line: 13, column: 3, scope: !26)
+!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
+!27 = !DILocation(line: 17, column: 3, scope: !18)
+!28 = !DILocation(line: 14, column: 10, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
+!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
+!31 = !DILocation(line: 14, column: 29, scope: !29)
+!32 = !DILocation(line: 14, column: 21, scope: !33)
+!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!34 = !DILocation(line: 14, column: 19, scope: !29)
+!35 = !DILocation(line: 14, column: 7, scope: !29)
+!36 = !DILocation(line: 13, column: 33, scope: !37)
+!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
+!38 = !DILocation(line: 13, column: 26, scope: !39)
+!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
+!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!44 = !DILocation(line: 27, column: 22, scope: !40)
+!45 = !DILocation(line: 27, column: 11, scope: !40)
+!46 = !DILocation(line: 29, column: 3, scope: !40)
+!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!51 = !DILocation(line: 33, column: 22, scope: !47)
+!52 = !DILocation(line: 33, column: 11, scope: !47)
+!53 = !DILocation(line: 35, column: 3, scope: !47)
+!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!57 = !DILocation(line: 49, column: 9, scope: !58)
+!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
+!59 = !DILocation(line: 49, column: 7, scope: !54)
+!60 = !DILocation(line: 58, column: 14, scope: !61)
+!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
+!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
+!63 = !DILocation(line: 58, column: 5, scope: !61)
+!64 = !DILocation(line: 52, column: 16, scope: !65)
+!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
+!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
+!67 = !DILocation(line: 52, column: 12, scope: !65)
+!68 = !DILocation(line: 52, column: 9, scope: !65)
+!69 = !DILocation(line: 51, column: 14, scope: !70)
+!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
+!71 = !DILocation(line: 51, column: 5, scope: !70)
+!72 = !DILocation(line: 59, column: 16, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
+!74 = !DILocation(line: 59, column: 12, scope: !73)
+!75 = !DILocation(line: 59, column: 9, scope: !73)
+!76 = !DILocation(line: 63, column: 3, scope: !54)
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
new file mode 100644
index 000000000000..3ec64326da2d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-threshold=100 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s
+
+define void @test(void ()*) #0 !dbg !3 {
+;; Add two direct call to force top-down order for sample profile loader
+  call void @_Z3foov(), !dbg !7
+  call void @_Z3barv(), !dbg !7
+  call void @_Z3bazv(), !dbg !7
+  %2 = alloca void ()*
+  store void ()* %0, void ()** %2
+  %3 = load void ()*, void ()** %2
+  call void %3(), !dbg !4
+  %4 = alloca void ()*
+  store void ()* %0, void ()** %4
+  %5 = load void ()*, void ()** %4
+  call void %5(), !dbg !5
+  ret void
+}
+
+define void @_Z3foov() #0 !dbg !8 {
+  ret void
+}
+
+define void @_Z3barv() #0 !dbg !9 {
+  ret void
+}
+
+define void @_Z3bazv() #0 !dbg !10 {
+  ret void
+}
+
+define void @_Z3zoov() #0 !dbg !11 {
+  ret void
+}
+
+attributes #0 = {"use-sample-profile"}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1)
+!1 = !DIFile(filename: "test.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, unit: !0)
+!4 = !DILocation(line: 4, scope: !3)
+!5 = !DILocation(line: 5, scope: !3)
+!6 = !DILocation(line: 6, scope: !3)
+!7 = !DILocation(line: 7, scope: !3)
+!8 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 29, unit: !0)
+!9 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 32, unit: !0)
+!10 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 24, unit: !0)
+!11 = distinct !DISubprogram(name: "zoo", linkageName: "_Z3zoov", scope: !1, file: !1, line: 24, unit: !0)
+
+
+; ICP-ALL: remark: test.cc:5:0: _Z3bazv inlined into test
+; ICP-ALL-NEXT: remark: test.cc:4:0: _Z3foov inlined into test
+; ICP-ALL-NEXT: remark: test.cc:4:0: _Z3barv inlined into test
+; ICP-ALL-NOT: remark
+
+; ICP-HOT: remark: test.cc:4:0: _Z3foov inlined into test
+; ICP-HOT-NOT: remark
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
new file mode 100644
index 000000000000..14e916d8c2e8
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
@@ -0,0 +1,180 @@
+; Test for CSSPGO's new early inliner using priority queue
+
+; Note that we need new pass manager to enable top-down processing for sample profile loader
+; Test we inlined the following in top-down order with old inliner
+;   main:3 @ _Z5funcAi
+;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
+;   _Z5funcBi:1 @ _Z8funcLeafi
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
+;
+; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW
+;
+; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
+;
+; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
+;
+; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob.
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2
+
+
+; INLINE-BASE: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
+; INLINE-BASE: remark: merged.cpp:27:11: _Z8funcLeafi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10
+; INLINE-BASE: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
+
+; INLINE-NEW: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
+; INLINE-NEW-NOT: remark
+
+; INLINE-NEW-LIMIT1-NOT: remark
+
+; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11
+; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
+; INLINE-NEW-LIMIT2-NOT: remark
+
+@factor = dso_local global i32 3, align 4, !dbg !0
+
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
+entry:
+  br label %for.body, !dbg !25
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add3, !dbg !27
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
+  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
+  %add = add nuw nsw i32 %x.011, 1, !dbg !31
+  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
+  %add2 = add i32 %call, %r.010, !dbg !34
+  %add3 = add i32 %add2, %call1, !dbg !35
+  %dec = add nsw i32 %x.011, -1, !dbg !36
+  %cmp = icmp eq i32 %x.011, 0, !dbg !38
+  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
+}
+
+define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
+entry:
+  %add = add nsw i32 %x, 100000, !dbg !44
+  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
+  ret i32 %call, !dbg !46
+}
+
+define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
+entry:
+  %cmp = icmp sgt i32 %x, 0, !dbg !57
+  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
+
+while.cond2.preheader:                            ; preds = %entry
+  %cmp313 = icmp slt i32 %x, 0, !dbg !60
+  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
+  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
+  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
+  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
+  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
+  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
+
+while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
+  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
+  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
+  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
+  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
+  %cmp3 = icmp slt i32 %add, 0, !dbg !60
+  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
+
+if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
+  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
+  ret i32 %x.addr.2, !dbg !76
+}
+
+define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !51
+  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
+  ret i32 %call, !dbg !53
+}
+
+declare i32 @_Z3fibi(i32)
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
+!4 = !{}
+!5 = !{!6, !10, !11}
+!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!12 = !{!0}
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!14 = !{i32 7, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{i32 1, !"wchar_size", i32 4}
+!17 = !{!"clang version 11.0.0"}
+!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!9}
+!21 = !{!22, !23}
+!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
+!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
+!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
+!25 = !DILocation(line: 13, column: 3, scope: !26)
+!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
+!27 = !DILocation(line: 17, column: 3, scope: !18)
+!28 = !DILocation(line: 14, column: 10, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
+!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
+!31 = !DILocation(line: 14, column: 29, scope: !29)
+!32 = !DILocation(line: 14, column: 21, scope: !33)
+!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!34 = !DILocation(line: 14, column: 19, scope: !29)
+!35 = !DILocation(line: 14, column: 7, scope: !29)
+!36 = !DILocation(line: 13, column: 33, scope: !37)
+!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
+!38 = !DILocation(line: 13, column: 26, scope: !39)
+!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
+!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!44 = !DILocation(line: 27, column: 22, scope: !40)
+!45 = !DILocation(line: 27, column: 11, scope: !40)
+!46 = !DILocation(line: 29, column: 3, scope: !40)
+!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!51 = !DILocation(line: 33, column: 22, scope: !47)
+!52 = !DILocation(line: 33, column: 11, scope: !47)
+!53 = !DILocation(line: 35, column: 3, scope: !47)
+!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!57 = !DILocation(line: 49, column: 9, scope: !58)
+!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
+!59 = !DILocation(line: 49, column: 7, scope: !54)
+!60 = !DILocation(line: 58, column: 14, scope: !61)
+!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
+!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
+!63 = !DILocation(line: 58, column: 5, scope: !61)
+!64 = !DILocation(line: 52, column: 16, scope: !65)
+!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
+!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
+!67 = !DILocation(line: 52, column: 12, scope: !65)
+!68 = !DILocation(line: 52, column: 9, scope: !65)
+!69 = !DILocation(line: 51, column: 14, scope: !70)
+!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
+!71 = !DILocation(line: 51, column: 5, scope: !70)
+!72 = !DILocation(line: 59, column: 16, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
+!74 = !DILocation(line: 59, column: 12, scope: !73)
+!75 = !DILocation(line: 59, column: 9, scope: !73)
+!76 = !DILocation(line: 63, column: 3, scope: !54)
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll
index 1a7a53457a5d..7789e18b394a 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker-debug.ll
@@ -3,11 +3,11 @@
 ; based on inline decision, so post inline counts are accurate.
 
 ; Note that we need new pass manager to enable top-down processing for sample profile loader
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-prioritized-inline=0 -sample-profile-inline-size=0 -debug-only=sample-context-tracker -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-HOT
 
 
-; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile
+; Test we inlined the following in top-down order and promot rest not inlined context profile into base profile
 ;   main:3 @ _Z5funcAi
 ;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
 ;   _Z5funcBi:1 @ _Z8funcLeafi
@@ -20,13 +20,9 @@
 ; INLINE-ALL-NEXT: Getting callee context for instr:   %call1 = tail call i32 @_Z5funcAi
 ; INLINE-ALL-NEXT:   Callee context found: main:3 @ _Z5funcAi
 ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi
-; INLINE-ALL-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z5funcBi(
-; INLINE-ALL-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
 ; INLINE-ALL-NEXT: Getting callee context for instr:   %call.i = tail call i32 @_Z8funcLeafi
 ; INLINE-ALL-NEXT:   Callee context found: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
 ; INLINE-ALL-NEXT: Marking context profile as inlined: main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
-; INLINE-ALL-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
-; INLINE-ALL-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
 ; INLINE-ALL-NEXT: Getting callee context for instr:   %call.i1 = tail call i32 @_Z3fibi
 ; INLINE-ALL-NEXT: Getting callee context for instr:   %call5.i = tail call i32 @_Z3fibi
 ; INLINE-ALL-NEXT: Getting base profile for function: _Z5funcAi
@@ -48,24 +44,23 @@
 ; INLINE-ALL-NEXT: Getting base profile for function: _Z8funcLeafi
 ; INLINE-ALL-NEXT:   Merging context profile into base profile: _Z8funcLeafi
 
-; Testwe we inlined the following in top-down order and promot rest not inlined context profile into base profile
-;   main:3 @ _Z5funcAi
+; Test we inlined the following in top-down order and promot rest not inlined context profile into base profile
 ;   _Z5funcAi:1 @ _Z8funcLeafi
 ;   _Z5funcBi:1 @ _Z8funcLeafi
 ; INLINE-HOT:      Getting base profile for function: main
 ; INLINE-HOT-NEXT:   Merging context profile into base profile: main
 ; INLINE-HOT-NEXT:   Found context tree root to promote: external:12 @ main
 ; INLINE-HOT-NEXT:   Context promoted and merged to: main
-; INLINE-HOT-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !58
+; INLINE-HOT-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z5funcBi
 ; INLINE-HOT-NEXT:   Callee context found: main:3.1 @ _Z5funcBi
-; INLINE-HOT-NEXT: Getting callee context for instr:   %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !63
+; INLINE-HOT-NEXT: Getting callee context for instr:   %call1 = tail call i32 @_Z5funcAi
 ; INLINE-HOT-NEXT:   Callee context found: main:3 @ _Z5funcAi
 ; INLINE-HOT-NEXT: Getting base profile for function: _Z5funcAi
 ; INLINE-HOT-NEXT:   Merging context profile into base profile: _Z5funcAi
 ; INLINE-HOT-NEXT:   Found context tree root to promote: main:3 @ _Z5funcAi
 ; INLINE-HOT-NEXT:   Context promoted to: _Z5funcAi
 ; INLINE-HOT-NEXT:   Context promoted to: _Z5funcAi:1 @ _Z8funcLeafi
-; INLINE-HOT-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50
+; INLINE-HOT-NEXT:   Getting callee context for instr:   %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !50
 ; INLINE-HOT-NEXT:   Callee context found: _Z5funcAi:1 @ _Z8funcLeafi
 ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcAi:1 @ _Z8funcLeafi
 ; INLINE-HOT-NEXT: Getting callee context for instr:   %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62
@@ -79,11 +74,11 @@
 ; INLINE-HOT-NEXT:   Context promoted to: _Z5funcBi:1 @ _Z8funcLeafi
 ; INLINE-HOT-NEXT:   Found context tree root to promote: externalA:17 @ _Z5funcBi
 ; INLINE-HOT-NEXT:   Context promoted and merged to: _Z5funcBi
-; INLINE-HOT-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !50
+; INLINE-HOT-NEXT: Getting callee context for instr:   %call = tail call i32 @_Z8funcLeafi
 ; INLINE-HOT-NEXT:   Callee context found: _Z5funcBi:1 @ _Z8funcLeafi
 ; INLINE-HOT-NEXT: Marking context profile as inlined: _Z5funcBi:1 @ _Z8funcLeafi
-; INLINE-HOT-NEXT: Getting callee context for instr:   %call.i = tail call i32 @_Z3fibi(i32 %tmp.i) #2, !dbg !62
-; INLINE-HOT-NEXT: Getting callee context for instr:   %call5.i = tail call i32 @_Z3fibi(i32 %tmp1.i) #2, !dbg !69
+; INLINE-HOT-NEXT: Getting callee context for instr:   %call.i = tail call i32 @_Z3fibi
+; INLINE-HOT-NEXT: Getting callee context for instr:   %call5.i = tail call i32 @_Z3fibi
 ; INLINE-HOT-NEXT: Getting base profile for function: _Z8funcLeafi
 ; INLINE-HOT-NEXT:   Merging context profile into base profile: _Z8funcLeafi
 
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
index adda7022047d..8d4e23829941 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
@@ -4,19 +4,18 @@
 ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t
 
 ; Note that we need new pass manager to enable top-down processing for sample profile loader
-; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
+; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
 ;   main:3 @ _Z5funcAi
 ;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
 ;   _Z5funcBi:1 @ _Z8funcLeafi
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
-
-; Testwe we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
-;   main:3 @ _Z5funcAi
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-ALL
+;
+; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
 ;   _Z5funcAi:1 @ _Z8funcLeafi
 ;   _Z5funcBi:1 @ _Z8funcLeafi
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -profile-sample-accurate -S | FileCheck %s --check-prefix=INLINE-HOT
 
 
 @factor = dso_local global i32 3, align 4, !dbg !0
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index a5033a0dc190..d47359fa0b5f 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -1,8 +1,8 @@
-; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-inline.prof -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s
 ; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml
 
 ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-inline.prof -o %t2
-; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-file=%t2 -S -pass-remarks=sample-profile -sample-profile-prioritized-inline=0 -pass-remarks-output=%t2.opt.yaml 2>&1 | FileCheck %s
 ; RUN: FileCheck %s -check-prefix=YAML < %t2.opt.yaml
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

From c2f3f45b5c5bd6f9b86a766fc40130b34acb8293 Mon Sep 17 00:00:00 2001
From: Wenlei He <aktoon@gmail.com>
Date: Tue, 19 Jan 2021 23:29:14 -0800
Subject: [PATCH 056/244] [CSSPGO] Factor out common part for CSSPGO inline and
 AFDO inline

Refactoring SampleProfileLoader::inlineHotFunctions to use helpers from CSSPGO inlining and reduce similar code in the inlining loop, plus minor cleanup for AFDO path.

This is resubmit of D95024, with build break and overtighten assertion fixed.

Test Plan:

(cherry picked from commit 1645f465be85223e9f5b6303a3e5e0e491fd819f)
---
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 205 +++++++-----------
 .../SampleProfile/pseudo-probe-inline.ll      |   2 +-
 llvm/test/Transforms/SampleProfile/remarks.ll |   4 +-
 3 files changed, 80 insertions(+), 131 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 665c4078f3ee..2cfefd3a18ea 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -416,20 +416,18 @@ class SampleProfileLoader {
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
   mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
-  CallBase *tryPromoteIndirectCall(Function &F, StringRef CalleeName,
-                                   uint64_t &Sum, uint64_t Count, CallBase *I,
-                                   const char *&Reason);
-  bool inlineCallInstruction(CallBase &CB,
-                             const FunctionSamples *CalleeSamples);
+  // Attempt to promote indirect call and also inline the promoted call
+  bool tryPromoteAndInlineCandidate(
+      Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+      DenseSet<Instruction *> &PromotedInsns,
+      SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
-  // Helper functions call-site prioritized BFS inliner
-  // Will change the main FDO inliner to be work list based directly in
-  // upstream, then merge this change with that and remove the duplication.
   InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
   bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
-  bool tryInlineCandidate(InlineCandidate &Candidate,
-                          SmallVector<CallBase *, 8> &InlinedCallSites);
+  bool
+  tryInlineCandidate(InlineCandidate &Candidate,
+                     SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool
   inlineHotFunctionsWithPriority(Function &F,
                                  DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -1077,70 +1075,46 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return it.first->second;
 }
 
-CallBase *
-SampleProfileLoader::tryPromoteIndirectCall(Function &F, StringRef CalleeName,
-                                            uint64_t &Sum, uint64_t Count,
-                                            CallBase *I, const char *&Reason) {
-  Reason = "Callee function not available";
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F  Caller function.
+/// \param Candidate  ICP and inline candidate.
+/// \param Sum  Sum of target counts for indirect call.
+/// \param PromotedInsns  Map to keep track of indirect call already processed.
+/// \param Candidate  ICP and inline candidate.
+/// \param InlinedCallSite  Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+    Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+    DenseSet<Instruction *> &PromotedInsns,
+    SmallVector<CallBase *, 8> *InlinedCallSite) {
+  const char *Reason = "Callee function not available";
   // R->getValue() != &F is to prevent promoting a recursive call.
   // If it is a recursive call, we do not inline it as it could bloat
   // the code exponentially. There is way to better handle this, e.g.
   // clone the caller first, and inline the cloned caller if it is
   // recursive. As llvm does not inline recursive calls, we will
   // simply ignore it instead of handling it explicitly.
-  auto R = SymbolMap.find(CalleeName);
+  auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
   if (R != SymbolMap.end() && R->getValue() &&
       !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
       R->getValue()->hasFnAttribute("use-sample-profile") &&
-      R->getValue() != &F && isLegalToPromote(*I, R->getValue(), &Reason)) {
+      R->getValue() != &F &&
+      isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
     auto *DI =
-        &pgo::promoteIndirectCall(*I, R->getValue(), Count, Sum, false, ORE);
-    Sum -= Count;
-    return DI;
-  }
-  return nullptr;
-}
-
-bool SampleProfileLoader::inlineCallInstruction(
-    CallBase &CB, const FunctionSamples *CalleeSamples) {
-  if (ExternalInlineAdvisor) {
-    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
+        &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+                                  Candidate.CallsiteCount, Sum, false, ORE);
+    if (DI) {
+      Sum -= Candidate.CallsiteCount;
+      PromotedInsns.insert(Candidate.CallInstr);
+      Candidate.CallInstr = DI;
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
+        return tryInlineCandidate(Candidate, InlinedCallSite);
     }
-    // Dummy record, we don't use it for replay.
-    Advice->recordInlining();
-  }
-
-  Function *CalledFunction = CB.getCalledFunction();
-  assert(CalledFunction);
-  DebugLoc DLoc = CB.getDebugLoc();
-  BasicBlock *BB = CB.getParent();
-  InlineParams Params = getInlineParams();
-  Params.ComputeFullInlineCost = true;
-  // Checks if there is anything in the reachable portion of the callee at
-  // this callsite that makes this inlining potentially illegal. Need to
-  // set ComputeFullInlineCost, otherwise getInlineCost may return early
-  // when cost exceeds threshold without checking all IRs in the callee.
-  // The acutal cost does not matter because we only checks isNever() to
-  // see if it is legal to inline the callsite.
-  InlineCost Cost =
-      getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
-  if (Cost.isNever()) {
-    ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
-              << "incompatible inlining");
-    return false;
-  }
-  InlineFunctionInfo IFI(nullptr, GetAC);
-  if (InlineFunction(CB, IFI).isSuccess()) {
-    // The call to InlineFunction erases I, so we can't pass it here.
-    emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
-                    true, CSINLINE_DEBUG);
-    if (ProfileIsCS)
-      ContextTracker->markContextSamplesInlined(CalleeSamples);
-    ++NumCSInlined;
-    return true;
+  } else {
+    LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+                      << Candidate.CalleeSamples->getFuncName() << " because "
+                      << Reason << "\n");
   }
   return false;
 }
@@ -1206,10 +1180,11 @@ bool SampleProfileLoader::inlineHotFunctions(
          "ProfAccForSymsInList should be false when profile-sample-accurate "
          "is enabled");
 
-  DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+  DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
   bool Changed = false;
-  while (true) {
-    bool LocalChanged = false;
+  bool LocalChanged = true;
+  while (LocalChanged) {
+    LocalChanged = false;
     SmallVector<CallBase *, 10> CIS;
     for (auto &BB : F) {
       bool Hot = false;
@@ -1223,7 +1198,7 @@ bool SampleProfileLoader::inlineHotFunctions(
                    "GUIDToFuncNameMap has to be populated");
             AllCandidates.push_back(CB);
             if (FS->getEntrySamples() > 0 || ProfileIsCS)
-              localNotInlinedCallSites.try_emplace(CB, FS);
+              LocalNotInlinedCallSites.try_emplace(CB, FS);
             if (callsiteIsHot(FS, PSI))
               Hot = true;
             else if (shouldInlineColdCallee(*CB))
@@ -1241,6 +1216,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
+      InlineCandidate Candidate = {I,
+                                   LocalNotInlinedCallSites.count(I)
+                                       ? LocalNotInlinedCallSites[I]
+                                       : nullptr,
+                                   0 /* dummy count */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1257,30 +1237,16 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          const char *Reason = nullptr;
-          auto CalleeFunctionName = FS->getFuncName();
-          if (CallBase *DI =
-                  tryPromoteIndirectCall(F, CalleeFunctionName, Sum,
-                                         FS->getEntrySamples(), I, Reason)) {
-            PromotedInsns.insert(I);
-            // If profile mismatches, we should not attempt to inline DI.
-            if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(cast<CallBase>(*DI), FS)) {
-              localNotInlinedCallSites.erase(I);
-              LocalChanged = true;
-            }
-          } else {
-            LLVM_DEBUG(dbgs()
-                       << "\nFailed to promote indirect call to "
-                       << CalleeFunctionName << " because " << Reason << "\n");
+          Candidate = {I, FS, FS->getEntrySamples()};
+          if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+            LocalNotInlinedCallSites.erase(I);
+            LocalChanged = true;
           }
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(*I, localNotInlinedCallSites.count(I)
-                                          ? localNotInlinedCallSites[I]
-                                          : nullptr)) {
-          localNotInlinedCallSites.erase(I);
+        if (tryInlineCandidate(Candidate)) {
+          LocalNotInlinedCallSites.erase(I);
           LocalChanged = true;
         }
       } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1288,11 +1254,7 @@ bool SampleProfileLoader::inlineHotFunctions(
             InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
       }
     }
-    if (LocalChanged) {
-      Changed = true;
-    } else {
-      break;
-    }
+    Changed |= LocalChanged;
   }
 
   // For CS profile, profile for not inlined context will be merged when
@@ -1301,7 +1263,7 @@ bool SampleProfileLoader::inlineHotFunctions(
     return Changed;
 
   // Accumulate not inlined callsite information into notInlinedSamples
-  for (const auto &Pair : localNotInlinedCallSites) {
+  for (const auto &Pair : LocalNotInlinedCallSites) {
     CallBase *I = Pair.getFirst();
     Function *Callee = I->getCalledFunction();
     if (!Callee || Callee->isDeclaration())
@@ -1347,7 +1309,7 @@ bool SampleProfileLoader::inlineHotFunctions(
 }
 
 bool SampleProfileLoader::tryInlineCandidate(
-    InlineCandidate &Candidate, SmallVector<CallBase *, 8> &InlinedCallSites) {
+    InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
 
   CallBase &CB = *Candidate.CallInstr;
   Function *CalledFunction = CB.getCalledFunction();
@@ -1372,9 +1334,11 @@ bool SampleProfileLoader::tryInlineCandidate(
                     true, CSINLINE_DEBUG);
 
     // Now populate the list of newly exposed call sites.
-    InlinedCallSites.clear();
-    for (auto &I : IFI.InlinedCallSites)
-      InlinedCallSites.push_back(I);
+    if (InlinedCallSites) {
+      InlinedCallSites->clear();
+      for (auto &I : IFI.InlinedCallSites)
+        InlinedCallSites->push_back(I);
+    }
 
     if (ProfileIsCS)
       ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
@@ -1409,8 +1373,6 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
 
 InlineCost
 SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
-  assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
-
   std::unique_ptr<InlineAdvice> Advice = nullptr;
   if (ExternalInlineAdvisor) {
     Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
@@ -1446,18 +1408,16 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
   InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
                                   GetTTI(*Callee), GetAC, GetTLI);
 
+  // Honor always inline and never inline from call analyzer
+  if (Cost.isNever() || Cost.isAlways())
+    return Cost;
+
   // For old FDO inliner, we inline the call site as long as cost is not
   // "Never". The cost-benefit check is done earlier.
   if (!CallsitePrioritizedInline) {
-    if (Cost.isNever())
-      return Cost;
-    return InlineCost::getAlways("hot callsite previously inlined");
+    return InlineCost::get(Cost.getCost(), INT_MAX);
   }
 
-  // Honor always inline and never inline from call analyzer
-  if (Cost.isNever() || Cost.isAlways())
-    return Cost;
-
   // Otherwise only use the cost from call analyzer, but overwite threshold with
   // Sample PGO threshold.
   return InlineCost::get(Cost.getCost(), SampleThreshold);
@@ -1542,34 +1502,23 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
         // fixed, but we generate different types).
         if (!PSI->isHotCount(EntryCountDistributed))
           break;
-        const char *Reason = nullptr;
-        auto CalleeFunctionName = FS->getFuncName();
-        if (CallBase *DI = tryPromoteIndirectCall(
-                F, CalleeFunctionName, Sum, EntryCountDistributed, I, Reason)) {
-          // Attach function profile for promoted indirect callee, and update
-          // call site count for the promoted inline candidate too.
-          Candidate = {DI, FS, EntryCountDistributed};
-          PromotedInsns.insert(I);
-          SmallVector<CallBase *, 8> InlinedCallSites;
-          // If profile mismatches, we should not attempt to inline DI.
-          if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-              tryInlineCandidate(Candidate, InlinedCallSites)) {
-            for (auto *CB : InlinedCallSites) {
-              if (getInlineCandidate(&NewCandidate, CB))
-                CQueue.emplace(NewCandidate);
-            }
-            Changed = true;
+        SmallVector<CallBase *, 8> InlinedCallSites;
+        // Attach function profile for promoted indirect callee, and update
+        // call site count for the promoted inline candidate too.
+        Candidate = {I, FS, EntryCountDistributed};
+        if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
+                                         &InlinedCallSites)) {
+          for (auto *CB : InlinedCallSites) {
+            if (getInlineCandidate(&NewCandidate, CB))
+              CQueue.emplace(NewCandidate);
           }
-        } else {
-          LLVM_DEBUG(dbgs()
-                     << "\nFailed to promote indirect call to "
-                     << CalleeFunctionName << " because " << Reason << "\n");
+          Changed = true;
         }
       }
     } else if (CalledFunction && CalledFunction->getSubprogram() &&
                !CalledFunction->isDeclaration()) {
       SmallVector<CallBase *, 8> InlinedCallSites;
-      if (tryInlineCandidate(Candidate, InlinedCallSites)) {
+      if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
         for (auto *CB : InlinedCallSites) {
           if (getInlineCandidate(&NewCandidate, CB))
             CQueue.emplace(NewCandidate);
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index d47359fa0b5f..5359fd4da067 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -89,7 +89,7 @@ if.end:
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '15'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - Threshold:       '2147483647'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          foo
diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 3add1e74abaa..46f016433b20 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -21,7 +21,7 @@
 
 ; We are expecting foo() to be inlined in main() (almost all the cycles are
 ; spent inside foo).
-; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=225) at callsite main:0:21;
+; CHECK: remark: remarks.cc:13:21: _Z3foov inlined into main to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
 ; CHECK: remark: remarks.cc:9:19: rand inlined into main to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
 
 ; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -47,7 +47,7 @@
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '130'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '225'
+;YAML-NEXT:    - Threshold:       '2147483647'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          main

From a9157c5628dc89b13936bbc8eef261cb02d63d40 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Fri, 11 Dec 2020 12:18:31 -0800
Subject: [PATCH 057/244] [CSSPGO] Introducing distribution factor for pseudo
 probe.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count.

This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes.

A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead.

Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D93264

(cherry picked from commit 3d89b3cbec230633e8228787819b15116c1a1730)
---
 clang/test/CodeGen/pseudo-probe-emit.c        |   8 +-
 llvm/include/llvm/IR/IntrinsicInst.h          |   8 +-
 llvm/include/llvm/IR/Intrinsics.td            |   2 +-
 llvm/include/llvm/IR/PseudoProbe.h            |  27 ++-
 .../llvm/Passes/StandardInstrumentations.h    |   2 +
 llvm/include/llvm/ProfileData/SampleProf.h    |  10 ++
 .../llvm/Transforms/IPO/SampleProfileProbe.h  |  41 +++++
 llvm/lib/IR/PseudoProbe.cpp                   |  41 +++++
 llvm/lib/Passes/PassBuilder.cpp               |   6 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/lib/Passes/StandardInstrumentations.cpp  |   1 +
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 108 +++++++++---
 .../lib/Transforms/IPO/SampleProfileProbe.cpp | 162 +++++++++++++++++-
 .../Inputs/pseudo-probe-update.prof           |   8 +
 .../SampleProfile/pseudo-probe-emit-inline.ll |  20 +--
 .../SampleProfile/pseudo-probe-emit.ll        |  22 ++-
 .../SampleProfile/pseudo-probe-inline.ll      |  38 ++--
 .../SampleProfile/pseudo-probe-profile.ll     |  42 ++++-
 .../SampleProfile/pseudo-probe-update.ll      |  45 +++++
 .../SampleProfile/pseudo-probe-verify.ll      |  77 +++++++++
 20 files changed, 595 insertions(+), 74 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll

diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c
index 059673b6992e..fccc8f04844d 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -6,12 +6,12 @@ void bar();
 void go();
 
 void foo(int x) {
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   if (x == 0)
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
     bar();
   else
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
     go();
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
 }
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 9d68f3fdde6c..df3a1d568756 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -981,12 +981,16 @@ class PseudoProbeInst : public IntrinsicInst {
     return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
   }
 
+  ConstantInt *getIndex() const {
+    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+  }
+
   ConstantInt *getAttributes() const {
     return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
   }
 
-  ConstantInt *getIndex() const {
-    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+  ConstantInt *getFactor() const {
+    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
   }
 };
 
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b2bfc6e6f9e6..21307ed1bd91 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1298,7 +1298,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int
 // Like the sideeffect intrinsic defined above, this intrinsic is treated by the 
 // optimizer as having opaque side effects so that it won't be get rid of or moved 
 // out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
                                     [IntrInaccessibleMemOnly, IntrWillReturn]>;
 
 // Intrinsics to support half precision floating point format
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index e0370c264102..5165e80caa2d 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -16,28 +16,39 @@
 #include "llvm/ADT/Optional.h"
 #include <cassert>
 #include <cstdint>
+#include <limits>
 
 namespace llvm {
 
 class Instruction;
+class BasicBlock;
 
 constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
 
 enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
 
+// The saturated distrution factor representing 100% for block probes.
+constexpr static uint64_t PseudoProbeFullDistributionFactor =
+    std::numeric_limits<uint64_t>::max();
+
 struct PseudoProbeDwarfDiscriminator {
+public:
   // The following APIs encodes/decodes per-probe information to/from a
   // 32-bit integer which is organized as:
   //  [2:0] - 0x7, this is reserved for regular discriminator,
   //          see DWARF discriminator encoding rule
   //  [18:3] - probe id
-  //  [25:19] - reserved
+  //  [25:19] - probe distribution factor
   //  [28:26] - probe type, see PseudoProbeType
   //  [31:29] - reserved for probe attributes
-  static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
+  static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
+                                uint32_t Factor) {
     assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
     assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
-    return (Index << 3) | (Type << 26) | 0x7;
+    assert(Flags <= 0x7);
+    assert(Factor <= 100 &&
+           "Probe distribution factor too big to encode, exceeding 100");
+    return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
   }
 
   static uint32_t extractProbeIndex(uint32_t Value) {
@@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator {
   static uint32_t extractProbeAttributes(uint32_t Value) {
     return (Value >> 29) & 0x7;
   }
+
+  static uint32_t extractProbeFactor(uint32_t Value) {
+    return (Value >> 19) & 0x7F;
+  }
+
+  // The saturated distrution factor representing 100% for callsites.
+  constexpr static uint8_t FullDistributionFactor = 100;
 };
 
 struct PseudoProbe {
   uint32_t Id;
   uint32_t Type;
   uint32_t Attr;
+  float Factor;
 };
 
 Optional<PseudoProbe> extractProbe(const Instruction &Inst);
 
+void setProbeDistributionFactor(Instruction &Inst, float Factor);
+
 } // end namespace llvm
 
 #endif // LLVM_IR_PSEUDOPROBE_H
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 795a980878e2..61c86b0468f2 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -22,6 +22,7 @@
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
 
 #include <string>
 #include <utility>
@@ -273,6 +274,7 @@ class StandardInstrumentations {
   OptBisectInstrumentation OptBisect;
   PreservedCFGCheckerInstrumentation PreservedCFGChecker;
   IRChangedPrinter PrintChangedIR;
+  PseudoProbeVerifier PseudoProbeVerification;
   VerifyInstrumentation Verify;
 
   bool VerifyEach;
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 346bc4c81d86..25d5b2376c11 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -347,6 +347,16 @@ class SampleRecord {
     return SortedTargets;
   }
 
+  /// Prorate call targets by a distribution factor.
+  static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
+                                               float DistributionFactor) {
+    CallTargetMap AdjustedTargets;
+    for (const auto &I : Targets) {
+      AdjustedTargets[I.first()] = I.second * DistributionFactor;
+    }
+    return AdjustedTargets;
+  }
+
   /// Merge the samples in \p Other into this record.
   /// Optionally scale sample counts by \p Weight.
   sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 78117fd4a9c2..cab893b50d19 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -16,6 +16,10 @@
 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassInstrumentation.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/ProfileData/SampleProf.h"
@@ -29,6 +33,8 @@ class Module;
 using namespace sampleprof;
 using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
 using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
+using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
 
 enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
 
@@ -43,6 +49,33 @@ class PseudoProbeDescriptor {
   uint64_t getFunctionHash() const { return FunctionHash; }
 };
 
+// A pseudo probe verifier that can be run after each IR passes to detect the
+// violation of updating probe factors. In principle, the sum of distribution
+// factor for a probe should be identical before and after a pass. For a
+// function pass, the factor sum for a probe would be typically 100%.
+class PseudoProbeVerifier {
+public:
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+  // Implementation of pass instrumentation callbacks for new pass manager.
+  void runAfterPass(StringRef PassID, Any IR);
+
+private:
+  // Allow a little bias due the rounding to integral factors.
+  constexpr static float DistributionFactorVariance = 0.02;
+  // Distribution factors from last pass.
+  FuncProbeFactorMap FunctionProbeFactors;
+
+  void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
+  void runAfterPass(const Module *M);
+  void runAfterPass(const LazyCallGraph::SCC *C);
+  void runAfterPass(const Function *F);
+  void runAfterPass(const Loop *L);
+  bool shouldVerifyFunction(const Function *F);
+  void verifyProbeFactors(const Function *F,
+                          const ProbeFactorMap &ProbeFactors);
+};
+
 // This class serves sample counts correlation for SampleProfileLoader by
 // analyzing pseudo probes and their function descriptors injected by
 // SampleProfileProber.
@@ -102,5 +135,13 @@ class SampleProfileProbePass : public PassInfoMixin<SampleProfileProbePass> {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
+class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
+  void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
+
+public:
+  PseudoProbeUpdatePass() {}
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
 } // end namespace llvm
 #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index 804214f06e7a..80d2963938d4 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -35,6 +35,9 @@ Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) {
           PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
       Probe.Attr =
           PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+      Probe.Factor =
+          PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+          (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
       return Probe;
     }
   }
@@ -47,6 +50,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
     Probe.Id = II->getIndex()->getZExtValue();
     Probe.Type = (uint32_t)PseudoProbeType::Block;
     Probe.Attr = II->getAttributes()->getZExtValue();
+    Probe.Factor = II->getFactor()->getZExtValue() /
+                   (float)PseudoProbeFullDistributionFactor;
     return Probe;
   }
 
@@ -55,4 +60,40 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
 
   return None;
 }
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+  assert(Factor >= 0 && Factor <= 1 &&
+         "Distribution factor must be in [0, 1.0]");
+  if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+    IRBuilder<> Builder(&Inst);
+    uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+    if (Factor < 1)
+      IntFactor *= Factor;
+    auto OrigFactor = II->getFactor()->getZExtValue();
+    if (IntFactor != OrigFactor)
+      II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+  } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+    if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+      const DILocation *DIL = DLoc;
+      auto Discriminator = DIL->getDiscriminator();
+      if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+        auto Index =
+            PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+        auto Type =
+            PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+        auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+            Discriminator);
+        // Round small factors to 0 to avoid over-counting.
+        uint32_t IntFactor =
+            PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+        if (Factor < 1)
+          IntFactor *= Factor;
+        uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+            Index, Type, Attr, IntFactor);
+        DIL = DIL->cloneWithDiscriminator(V);
+        Inst.setDebugLoc(DIL);
+      }
+    }
+  }
+}
 } // namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index d4c4c6e01ef5..6c1a7c75d30a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1423,6 +1423,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
   // Now add the optimization pipeline.
   MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
@@ -1477,6 +1480,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   if (PTO.Coroutines)
     MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 860bfade733d..877cb9ed13b3 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f
 MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
 MODULE_PASS("memprof-module", ModuleMemProfilerPass())
 MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
 #undef MODULE_PASS
 
 #ifndef CGSCC_ANALYSIS
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index a8bfe02d4432..6795aed7b04e 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks(
   OptBisect.registerCallbacks(PIC);
   PreservedCFGChecker.registerCallbacks(PIC);
   PrintChangedIR.registerCallbacks(PIC);
+  PseudoProbeVerification.registerCallbacks(PIC);
   if (VerifyEach)
     Verify.registerCallbacks(PIC);
 }
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2cfefd3a18ea..b2a9127773c3 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -108,6 +108,8 @@ STATISTIC(NumCSNotInlined,
 STATISTIC(NumMismatchedProfile,
           "Number of functions with CFG mismatched profile");
 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+          "Number of inlined callsites with a partial distribution factor");
 
 STATISTIC(NumCSInlinedHitMinLimit,
           "Number of functions with FDO inline stopped due to min size limit");
@@ -358,7 +360,14 @@ class GUIDToFuncNameMapper {
 struct InlineCandidate {
   CallBase *CallInstr;
   const FunctionSamples *CalleeSamples;
+  // Prorated callsite count, which will be used to guide inlining. For example,
+  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+  // copies will get their own distribution factors and their prorated counts
+  // will be used to decide if they should be inlined independently.
   uint64_t CallsiteCount;
+  // Call site distribution factor to prorate the profile samples for a
+  // duplicated callsite. Default value is 1.0.
+  float CallsiteDistribution;
 };
 
 // Inline candidate comparer using call site weight
@@ -418,8 +427,8 @@ class SampleProfileLoader {
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
   // Attempt to promote indirect call and also inline the promoted call
   bool tryPromoteAndInlineCandidate(
-      Function &F, InlineCandidate &Candidate, uint64_t &Sum,
-      DenseSet<Instruction *> &PromotedInsns,
+      Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+      uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
       SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -886,7 +895,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
 
   const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
   if (R) {
-    uint64_t Samples = R.get();
+    uint64_t Samples = R.get() * Probe->Factor;
     bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
     if (FirstMark) {
       ORE->emit([&]() {
@@ -894,13 +903,17 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
         Remark << "Applied " << ore::NV("NumSamples", Samples);
         Remark << " samples from profile (ProbeId=";
         Remark << ore::NV("ProbeId", Probe->Id);
+        Remark << ", Factor=";
+        Remark << ore::NV("Factor", Probe->Factor);
+        Remark << ", OriginalSamples=";
+        Remark << ore::NV("OriginalSamples", R.get());
         Remark << ")";
         return Remark;
       });
     }
-
     LLVM_DEBUG(dbgs() << "    " << Probe->Id << ":" << Inst
-                      << " - weight: " << R.get() << ")\n");
+                      << " - weight: " << R.get() << " - factor: "
+                      << format("%0.2f", Probe->Factor) << ")\n");
     return Samples;
   }
   return R;
@@ -1085,7 +1098,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 /// \param InlinedCallSite  Output vector for new call sites exposed after
 /// inlining.
 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
-    Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+    Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
     DenseSet<Instruction *> &PromotedInsns,
     SmallVector<CallBase *, 8> *InlinedCallSite) {
   const char *Reason = "Callee function not available";
@@ -1106,10 +1119,28 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
                                   Candidate.CallsiteCount, Sum, false, ORE);
     if (DI) {
       Sum -= Candidate.CallsiteCount;
+      // Prorate the indirect callsite distribution.
+      // Do not update the promoted direct callsite distribution at this
+      // point since the original distribution combined with the callee
+      // profile will be used to prorate callsites from the callee if
+      // inlined. Once not inlined, the direct callsite distribution should
+      // be prorated so that the it will reflect the real callsite counts.
+      setProbeDistributionFactor(*Candidate.CallInstr,
+                                 Candidate.CallsiteDistribution * Sum /
+                                     SumOrigin);
       PromotedInsns.insert(Candidate.CallInstr);
       Candidate.CallInstr = DI;
-      if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
-        return tryInlineCandidate(Candidate, InlinedCallSite);
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+        bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+        if (!Inlined) {
+          // Prorate the direct callsite distribution so that it reflects real
+          // callsite counts.
+          setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+                                              Candidate.CallsiteCount /
+                                              SumOrigin);
+        }
+        return Inlined;
+      }
     }
   } else {
     LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
@@ -1216,11 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
-      InlineCandidate Candidate = {I,
-                                   LocalNotInlinedCallSites.count(I)
-                                       ? LocalNotInlinedCallSites[I]
-                                       : nullptr,
-                                   0 /* dummy count */};
+      InlineCandidate Candidate = {
+          I,
+          LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+                                            : nullptr,
+          0 /* dummy count */, 1.0 /* dummy distribution factor */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1229,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           continue;
         uint64_t Sum;
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+          uint64_t SumOrigin = Sum;
           if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
             FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
                                      PSI->getOrCompHotCountThreshold());
@@ -1237,8 +1269,9 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          Candidate = {I, FS, FS->getEntrySamples()};
-          if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+          Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+          if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                           PromotedInsns)) {
             LocalNotInlinedCallSites.erase(I);
             LocalChanged = true;
           }
@@ -1343,6 +1376,23 @@ bool SampleProfileLoader::tryInlineCandidate(
     if (ProfileIsCS)
       ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
     ++NumCSInlined;
+
+    // Prorate inlined probes for a duplicated inlining callsite which probably
+    // has a distribution less than 100%. Samples for an inlinee should be
+    // distributed among the copies of the original callsite based on each
+    // callsite's distribution factor for counts accuracy. Note that an inlined
+    // probe may come with its own distribution factor if it has been duplicated
+    // in the inlinee body. The two factor are multiplied to reflect the
+    // aggregation of duplication.
+    if (Candidate.CallsiteDistribution < 1) {
+      for (auto &I : IFI.InlinedCallSites) {
+        if (Optional<PseudoProbe> Probe = extractProbe(*I))
+          setProbeDistributionFactor(*I, Probe->Factor *
+                                             Candidate.CallsiteDistribution);
+      }
+      NumDuplicatedInlinesite++;
+    }
+
     return true;
   }
   return false;
@@ -1360,14 +1410,19 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
   if (!CalleeSamples)
     return false;
 
+  float Factor = 1.0;
+  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+    Factor = Probe->Factor;
+
   uint64_t CallsiteCount = 0;
   ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
   if (Weight)
     CallsiteCount = Weight.get();
   if (CalleeSamples)
-    CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples());
+    CallsiteCount = std::max(
+        CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
 
-  *NewCandidate = {CB, CalleeSamples, CallsiteCount};
+  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
   return true;
 }
 
@@ -1479,6 +1534,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
       uint64_t Sum;
       auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
       uint64_t SumOrigin = Sum;
+      Sum *= Candidate.CallsiteDistribution;
       for (const auto *FS : CalleeSamples) {
         // TODO: Consider disable pre-lTO ICP for MonoLTO as well
         if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1486,7 +1542,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
                                    PSI->getOrCompHotCountThreshold());
           continue;
         }
-        uint64_t EntryCountDistributed = FS->getEntrySamples();
+        uint64_t EntryCountDistributed =
+            FS->getEntrySamples() * Candidate.CallsiteDistribution;
         // In addition to regular inline cost check, we also need to make sure
         // ICP isn't introducing excessive speculative checks even if individual
         // target looks beneficial to promote and inline. That means we should
@@ -1505,9 +1562,10 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
         SmallVector<CallBase *, 8> InlinedCallSites;
         // Attach function profile for promoted indirect callee, and update
         // call site count for the promoted inline candidate too.
-        Candidate = {I, FS, EntryCountDistributed};
-        if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
-                                         &InlinedCallSites)) {
+        Candidate = {I, FS, EntryCountDistributed,
+                     Candidate.CallsiteDistribution};
+        if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                         PromotedInsns, &InlinedCallSites)) {
           for (auto *CB : InlinedCallSites) {
             if (getInlineCandidate(&NewCandidate, CB))
               CQueue.emplace(NewCandidate);
@@ -1965,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) {
           auto T = FS->findCallTargetMapAt(CallSite);
           if (!T || T.get().empty())
             continue;
+          // Prorate the callsite counts to reflect what is already done to the
+          // callsite, such as ICP or calliste cloning.
+          if (FunctionSamples::ProfileIsProbeBased) {
+            if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+              if (Probe->Factor < 1)
+                T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+            }
+          }
           SmallVector<InstrProfValueData, 2> SortedCallTargets =
               GetSortedValueDataFromCallTargets(T.get());
           uint64_t Sum;
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 7cecd20b78d8..a885c3ee4ded 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -25,8 +26,10 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
 #include <vector>
 
 using namespace llvm;
@@ -35,6 +38,115 @@ using namespace llvm;
 STATISTIC(ArtificialDbgLine,
           "Number of probes that have an artificial debug line");
 
+static cl::opt<bool>
+    VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+                      cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+    "verify-pseudo-probe-funcs", cl::Hidden,
+    cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+    UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+                      cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+  // Skip function declaration.
+  if (F->isDeclaration())
+    return false;
+  // Skip function that will not be emitted into object file. The prevailing
+  // defintion will be verified instead.
+  if (F->hasAvailableExternallyLinkage())
+    return false;
+  // Do a name matching.
+  static std::unordered_set<std::string> VerifyFuncNames(
+      VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+  return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+  if (VerifyPseudoProbe) {
+    PIC.registerAfterPassCallback(
+        [this](StringRef P, Any IR, const PreservedAnalyses &) {
+          this->runAfterPass(P, IR);
+        });
+  }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+  std::string Banner =
+      "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+  dbgs() << Banner;
+  if (any_isa<const Module *>(IR))
+    runAfterPass(any_cast<const Module *>(IR));
+  else if (any_isa<const Function *>(IR))
+    runAfterPass(any_cast<const Function *>(IR));
+  else if (any_isa<const LazyCallGraph::SCC *>(IR))
+    runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+  else if (any_isa<const Loop *>(IR))
+    runAfterPass(any_cast<const Loop *>(IR));
+  else
+    llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+  for (const Function &F : *M)
+    runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+  for (const LazyCallGraph::Node &N : *C)
+    runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+  if (!shouldVerifyFunction(F))
+    return;
+  ProbeFactorMap ProbeFactors;
+  for (const auto &BB : *F)
+    collectProbeFactors(&BB, ProbeFactors);
+  verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+  const Function *F = L->getHeader()->getParent();
+  runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+                                              ProbeFactorMap &ProbeFactors) {
+  for (const auto &I : *Block) {
+    if (Optional<PseudoProbe> Probe = extractProbe(I))
+      ProbeFactors[Probe->Id] += Probe->Factor;
+  }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+    const Function *F, const ProbeFactorMap &ProbeFactors) {
+  bool BannerPrinted = false;
+  auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+  for (const auto &I : ProbeFactors) {
+    float CurProbeFactor = I.second;
+    if (PrevProbeFactors.count(I.first)) {
+      float PrevProbeFactor = PrevProbeFactors[I.first];
+      if (std::abs(CurProbeFactor - PrevProbeFactor) >
+          DistributionFactorVariance) {
+        if (!BannerPrinted) {
+          dbgs() << "Function " << F->getName() << ":\n";
+          BannerPrinted = true;
+        }
+        dbgs() << "Probe " << I.first << "\tprevious factor "
+               << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+               << format("%0.2f", CurProbeFactor) << "\n";
+      }
+    }
+
+    // Update
+    PrevProbeFactors[I.first] = I.second;
+  }
+}
+
 PseudoProbeManager::PseudoProbeManager(const Module &M) {
   if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
     for (const auto *Operand : FuncInfo->operands()) {
@@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
     Function *ProbeFn =
         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
-                     Builder.getInt32(0)};
+                     Builder.getInt32(0),
+                     Builder.getInt64(PseudoProbeFullDistributionFactor)};
     auto *Probe = Builder.CreateCall(ProbeFn, Args);
     AssignDebugLoc(Probe);
   }
@@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
     // Levarge the 32-bit discriminator field of debug data to store the ID and
     // type of a callsite probe. This gets rid of the dependency on plumbing a
     // customized metadata through the codegen pipeline.
-    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+        Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
     if (auto DIL = Call->getDebugLoc()) {
       DIL = DIL->cloneWithDiscriminator(V);
       Call->setDebugLoc(DIL);
@@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M,
 
   return PreservedAnalyses::none();
 }
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+                                          FunctionAnalysisManager &FAM) {
+  BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto BBProfileCount = [&BFI](BasicBlock *BB) {
+    return BFI.getBlockProfileCount(BB)
+               ? BFI.getBlockProfileCount(BB).getValue()
+               : 0;
+  };
+
+  // Collect the sum of execution weight for each probe.
+  ProbeFactorMap ProbeFactors;
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I))
+        ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+    }
+  }
+
+  // Fix up over-counted probes.
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+        float Sum = ProbeFactors[Probe->Id];
+        if (Sum != 0)
+          setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+      }
+    }
+  }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+  if (UpdatePseudoProbe) {
+    for (auto &F : M) {
+      if (F.isDeclaration())
+        continue;
+      FunctionAnalysisManager &FAM =
+          AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+      runOnFunction(F, FAM);
+    }
+  }
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
new file mode 100644
index 000000000000..62f9bd5992e7
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -0,0 +1,8 @@
+foo:3200:13
+ 1: 13
+ 2: 7
+ 3: 6
+ 4: 13
+ 5: 7
+ 6: 6
+ !CFGChecksum: 844530426352218
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
index 7e3c7e8deda2..4f730ba09a3a 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
@@ -11,14 +11,14 @@
 ; RUN: llvm-objdump --section-headers  %t4 | FileCheck %s --check-prefix=CHECK-OBJ
 
 define dso_local void @foo2() !dbg !7 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID1:]] 1 0 0
   ret void, !dbg !10
 }
 
 define dso_local void @foo() #0 !dbg !11 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID2]]:2
   call void @foo2(), !dbg !12
@@ -26,9 +26,9 @@ define dso_local void @foo() #0 !dbg !11 {
 }
 
 define dso_local i32 @entry() !dbg !14 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID3:]] 1 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID2]] 1 0 0 @ [[#GUID3]]:2
 ; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
@@ -41,13 +41,13 @@ define dso_local i32 @entry() !dbg !14 {
 ; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo"
 ; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]])
 ; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]])
-;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe
-;; with an index of 2.
-; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751)
+;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe
+;; with an index of 2 and a scale of 100%.
+; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551)
 ; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry"
 ; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3,  scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]])
 ; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]])
-; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751)
+; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551)
 ; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]])
 ; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3,  scope: ![[#BL1]], inlinedAt: ![[#INL2]])
 
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
index 2074b708380f..da5d46a32287 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
@@ -11,32 +11,36 @@
 
 ;; Check the generation of pseudoprobe intrinsic call.
 
+@a = dso_local global i32 0, align 4
+
 define void @foo(i32 %x) !dbg !3 {
 bb0:
   %cmp = icmp eq i32 %x, 0
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID:]] 1 0 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 3 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
+  store i32 6, i32* @a, align 4
   br label %bb3
 
 bb2:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 2 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
+  store i32 8, i32* @a, align 4
   br label %bb3
 
 bb3:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]]
   ret void, !dbg !12
 }
 
@@ -44,7 +48,7 @@ declare void @bar(i32 %x)
 
 define internal void @foo2(void (i32)* %f) !dbg !4 {
 entry:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; Check pseudo_probe metadata attached to the indirect call instruction.
@@ -64,13 +68,13 @@ entry:
 ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]])
 ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]])
 ; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]])
-;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe
+;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe
 ;; with an index of 2.
-; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887)
+; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687)
 ; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe
+;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe
 ;; with an index of 3.
-; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
+; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559)
 
 ; Check the generation of .pseudo_probe_desc section
 ; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index 5359fd4da067..055d41792290 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -12,18 +12,18 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
 entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1)
   %add = add nsw i32 %x, 100000, !dbg !19
 ;; Check zen is fully inlined so there's no call to zen anymore.
 ;; Check code from the inlining of zen is properly annotated here.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
 ; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
 ; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
 ; CHECK-NOT: call i32 @zen
   %call = call i32 @zen(i32 %add), !dbg !20
   ret i32 %call, !dbg !21
@@ -32,36 +32,36 @@ entry:
 ; CHECK: define dso_local i32 @zen
 define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
 entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1)
   %cmp = icmp sgt i32 %x, 0, !dbg !26
   br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
 
 while.cond:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
   %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
   %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
   br i1 %cmp1, label %while.body, label %if.end, !dbg !31
 
 while.body:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
   %0 = load volatile i32, i32* @factor, align 4, !dbg !32
   %sub = sub nsw i32 %x.addr.0, %0, !dbg !39
   br label %while.cond, !dbg !31
 
 while.cond2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
   %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
   %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
   br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
 
 while.body4:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
   %1 = load volatile i32, i32* @factor, align 4, !dbg !45
   %add = add nsw i32 %x.addr.1, %1, !dbg !48
   br label %while.cond2, !dbg !44
 
 if.end:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
   %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
   ret i32 %x.addr.2, !dbg !51
 }
@@ -109,6 +109,10 @@ if.end:
 ;YAML-NEXT:    - NumSamples:      '23'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '23'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 ;YAML:  --- !Analysis
@@ -121,6 +125,10 @@ if.end:
 ;YAML-NEXT:    - NumSamples:      '23'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '23'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 ;YAML:  --- !Analysis
@@ -133,6 +141,10 @@ if.end:
 ;YAML-NEXT:    - NumSamples:      '382920'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '382920'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
index 25fd04e9d710..34629a3743eb 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -8,26 +8,26 @@ entry:
   store i32 %x, i32* %x.addr, align 4
   %0 = load i32, i32* %x.addr, align 4
   %cmp = icmp eq i32 %0, 0
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   br i1 %cmp, label %if.then, label %if.else
   ; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]]
 
 if.then:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]]
   call void %f(i32 1)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
   store i32 1, i32* %retval, align 4
   br label %return
 
 if.else:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
   call void %f(i32 2)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
   store i32 2, i32* %retval, align 4
   br label %return
 
 return:
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
   %1 = load i32, i32* %retval, align 4
   ret i32 %1
 }
@@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
 
 ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
 ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
 ;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911)
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
 ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
 ;; with an index of 6.
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919)
+; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
 ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
 
 !llvm.module.flags = !{!9, !10}
@@ -69,6 +69,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '13'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -80,6 +84,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '5'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '7'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -91,6 +99,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '7'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -102,6 +114,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '6'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '6'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -113,6 +129,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '3'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '6'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -124,4 +144,8 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '4'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '13'
 ;YAML-NEXT:    - String:          ')'
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
new file mode 100644
index 000000000000..992afedd14f7
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S  | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+define i32 @foo(i1 %cond, i1 %cond2) #0 {
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+	br i1 %cond, label %T1, label %F1
+T1:
+; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]]
+	%v1 = call i32 @f1()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+    %cond3 = icmp eq i32 %v1, 412
+	br label %Merge
+F1:
+; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
+	%v2 = call i32 @f2()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+	br label %Merge
+Merge:
+
+	%A = phi i1 [%cond3, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+T2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+	call void @f3()
+	ret i32 %B
+F2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+	ret i32 %B
+}
+
+; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7}
+; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
+
+attributes #0 = {"use-sample-profile"}
+
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
new file mode 100644
index 000000000000..fd57dd8bc526
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -0,0 +1,77 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY
+; RUN: FileCheck %s < %t
+
+; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
+; VERIFY: Function foo:
+; VERIFY-DAG: Probe 6	previous factor 1.00	current factor 5.00
+; VERIFY-DAG: Probe 4	previous factor 1.00	current factor 5.00
+
+declare void @foo2() nounwind
+
+define void @foo(i32 %x) {
+bb:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+  %tmp = alloca [5 x i32*], align 16
+  br label %bb7.preheader
+
+bb3.loopexit:
+  %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ]
+  %tmp5.not = icmp eq i32 %spec.select.lcssa, 0
+  br i1 %tmp5.not, label %bb24, label %bb7.preheader
+
+bb7.preheader:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+  %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ]
+  br label %bb10
+
+bb10:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+  %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
+  %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
+  %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv
+  %tmp14 = load i32*, i32** %tmp13, align 8
+  %tmp15.not = icmp ne i32* %tmp14, null
+  %tmp18 = sext i1 %tmp15.not to i32
+  %spec.select = add nsw i32 %tmp1.14, %tmp18
+  call void @foo2(), !dbg !12
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 5
+  br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
+
+bb24:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+  ret void
+}
+
+;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
+;; with an index of 6 and a scale of -1%.
+; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.9.0"}
+!12 = !DILocation(line: 2, column: 20, scope: !4)
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.unroll.full"}

From ad2086658df181369a09ad69dac260a41dbab814 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Wed, 3 Feb 2021 20:57:59 -0500
Subject: [PATCH 058/244] [OpenMP][NVPTX] Take functions in `deviceRTLs` as
 `convergent`

OpenMP device compiler (similar to other SPMD compilers) assumes that
functions are convergent by default to avoid invalid transformations, such as
the bug (https://bugs.llvm.org/show_bug.cgi?id=49021).

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95971

(cherry picked from commit 0f0ce3c12edefd25448e39c4d20718a10d3d42c1)
---
 clang/lib/Frontend/CompilerInvocation.cpp     |  2 +
 .../OpenMP/target_attribute_convergent.cpp    | 13 +++
 .../libomptarget/test/offloading/bug49021.cpp | 85 +++++++++++++++++++
 3 files changed, 100 insertions(+)
 create mode 100644 clang/test/OpenMP/target_attribute_convergent.cpp
 create mode 100644 openmp/libomptarget/test/offloading/bug49021.cpp

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index d8be4ea14868..036388ebd355 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2470,6 +2470,8 @@ void CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
   bool IsTargetSpecified =
       Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
 
+  Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice;
+
   if (Opts.OpenMP || Opts.OpenMPSimd) {
     if (int Version = getLastArgIntValue(
             Args, OPT_fopenmp_version_EQ,
diff --git a/clang/test/OpenMP/target_attribute_convergent.cpp b/clang/test/OpenMP/target_attribute_convergent.cpp
new file mode 100644
index 000000000000..932214e987c8
--- /dev/null
+++ b/clang/test/OpenMP/target_attribute_convergent.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s
+// expected-no-diagnostics
+
+#pragma omp declare target
+
+void foo() {}
+
+#pragma omp end declare target
+
+// CHECK: Function Attrs: {{.*}}convergent{{.*}}
+// CHECK: define hidden void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]]
+// CHECK: attributes [[ATTRIBUTE_NUMBER]] = { {{.*}}convergent{{.*}} }
diff --git a/openmp/libomptarget/test/offloading/bug49021.cpp b/openmp/libomptarget/test/offloading/bug49021.cpp
new file mode 100644
index 000000000000..bcdbf68b10e0
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/bug49021.cpp
@@ -0,0 +1,85 @@
+// RUN: %libomptarget-compilexx-aarch64-unknown-linux-gnu -O3 && %libomptarget-run-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compilexx-powerpc64-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-powerpc64le-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-x86_64-pc-linux-gnu -O3 && %libomptarget-run-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda -O3 && %libomptarget-run-nvptx64-nvidia-cuda
+
+#include <iostream>
+
+template <typename T> int test_map() {
+  std::cout << "map(complex<>)" << std::endl;
+  T a(0.2), a_check;
+#pragma omp target map(from : a_check)
+  { a_check = a; }
+
+  if (a_check != a) {
+    std::cout << " wrong results";
+    return 1;
+  }
+
+  return 0;
+}
+
+template <typename T> int test_reduction() {
+  std::cout << "flat parallelism" << std::endl;
+  T sum(0), sum_host(0);
+  const int size = 100;
+  T array[size];
+  for (int i = 0; i < size; i++) {
+    array[i] = i;
+    sum_host += array[i];
+  }
+
+#pragma omp target teams distribute parallel for map(to: array[:size])         \
+                                                 reduction(+ : sum)
+  for (int i = 0; i < size; i++)
+    sum += array[i];
+
+  if (sum != sum_host)
+    std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
+
+  std::cout << "hierarchical parallelism" << std::endl;
+  const int nblock(10), block_size(10);
+  T block_sum[nblock];
+#pragma omp target teams distribute map(to                                     \
+                                        : array[:size])                        \
+    map(from                                                                   \
+        : block_sum[:nblock])
+  for (int ib = 0; ib < nblock; ib++) {
+    T partial_sum = 0;
+    const int istart = ib * block_size;
+    const int iend = (ib + 1) * block_size;
+#pragma omp parallel for reduction(+ : partial_sum)
+    for (int i = istart; i < iend; i++)
+      partial_sum += array[i];
+    block_sum[ib] = partial_sum;
+  }
+
+  sum = 0;
+  for (int ib = 0; ib < nblock; ib++) {
+    sum += block_sum[ib];
+  }
+
+  if (sum != sum_host) {
+    std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
+
+template <typename T> int test_complex() {
+  int ret = 0;
+  ret |= test_map<T>();
+  ret |= test_reduction<T>();
+  return ret;
+}
+
+int main() {
+  int ret = 0;
+  std::cout << "Testing float" << std::endl;
+  ret |= test_complex<float>();
+  std::cout << "Testing double" << std::endl;
+  ret |= test_complex<double>();
+  return ret;
+}

From e8cdcaeae406527c9a76b3dc5c522391c81dfdfd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 1 Feb 2021 10:56:09 -0800
Subject: [PATCH 059/244] [X86] Accept 64-bit GPRs for vextractps when using a
 register that requires EVEX.

This is consistent with the VEX version. It also fixes a sorting
issue in the matching table that caused the EVEX version to be
prioritized over VEX in intel syntax.

Fixes issue [2] from PR48991.

(cherry picked from commit c691fe14da93a7c9eff466231515d6d4d16124fa)
---
 llvm/lib/Target/X86/X86InstrAVX512.td             | 4 ++--
 llvm/test/MC/X86/intel-syntax-x86-64-avx.s        | 4 ++++
 llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s | 3 +++
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0c2b278fdd7b..19012797ae9a 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
 
 // vextractps - extract 32 bits from XMM
-def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
+def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
       (ins VR128X:$src1, u8imm:$src2),
       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
+      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
 
 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s
index bb57cb287f38..c1f20d204a8c 100644
--- a/llvm/test/MC/X86/intel-syntax-x86-64-avx.s
+++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx.s
@@ -167,3 +167,7 @@
 // CHECK: vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910]
 // CHECK: encoding: [0xc4,0xa1,0x6d,0xf5,0x8c,0xf1,0x02,0x00,0x00,0xe0]
           vpmaddwd ymm1, ymm2, ymmword ptr [rcx + 8*r14 - 536870910]
+
+// CHECK: vextractps ecx, xmm2, 1
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd1,0x01]
+          vextractps ecx, xmm2, 1
diff --git a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
index 29bde03c5860..31c43afe5017 100644
--- a/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
@@ -1260,3 +1260,6 @@
 // CHECK: encoding: [0x62,0xf1,0x7e,0x89,0xe6,0x11]
           vcvtdq2pd xmm2 {k1} {z}, qword ptr [rcx]
 
+// CHECK: vextractps ecx, xmm17, 1
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x17,0xc9,0x01]
+          vextractps rcx, xmm17, 1

From 7fad20eccc4f9fe5d03b2e381e26e8eb13a3e3be Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 4 Feb 2021 08:44:20 -0500
Subject: [PATCH 060/244] Revert "[OpenMP] Disabled profiling in `libomp` by
 default to unblock link errors"

This reverts commit f5602e0bf31ab590da19fa357980a753dbfd666e.
---
 openmp/CMakeLists.txt                 |  6 ------
 openmp/docs/design/Runtimes.rst       |  5 +----
 openmp/runtime/CMakeLists.txt         |  6 +++---
 openmp/runtime/src/CMakeLists.txt     | 12 +-----------
 openmp/runtime/src/kmp_config.h.cmake |  4 ++--
 openmp/runtime/src/kmp_runtime.cpp    |  6 +++---
 6 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index 4787d4b5a321..67600bebdafb 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -86,12 +86,6 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading."
        ${ENABLE_LIBOMPTARGET})
 option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget."
        ${ENABLE_LIBOMPTARGET})
-option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF)
-
-# Build host runtime library, after LIBOMPTARGET variables are set since they are needed
-# to enable time profiling support in the OpenMP runtime.
-add_subdirectory(runtime)
-
 if (OPENMP_ENABLE_LIBOMPTARGET)
   # Check that the library can actually be built.
   if (APPLE OR WIN32)
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index ad36e43eccdc..016b88ba324b 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -48,10 +48,7 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on
 `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_
 for time trace output. Using this library is enabled by default when building
 using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will
-be saved to the filename specified by the environment variable. For multi-threaded
-applications, profiling in ``libomp`` is also needed. Setting the CMake option
-``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will
-turn ``libomp`` into a C++ library.
+be saved to the filename specified by the environment variable.
 
 .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
 
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 8828ff8ef455..9fdd04f41646 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -34,6 +34,7 @@ if(${OPENMP_STANDALONE_BUILD})
   # Should assertions be enabled?  They are on by default.
   set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
     "enable assertions?")
+  set(LIBOMPTARGET_PROFILING_SUPPORT FALSE)
 else() # Part of LLVM build
   # Determine the native architecture from LLVM.
   string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH)
@@ -65,11 +66,10 @@ else() # Part of LLVM build
     libomp_get_architecture(LIBOMP_ARCH)
   endif ()
   set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
+  # Time profiling support
+  set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING})
 endif()
 
-# Time profiling support
-set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING})
-
 # FUJITSU A64FX is a special processor because its cache line size is 256.
 # We need to pass this information into kmp_config.h.
 if(LIBOMP_ARCH STREQUAL "aarch64")
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index 822f9ca2b825..2e927df84f5c 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -50,14 +50,6 @@ if(${LIBOMP_USE_HWLOC})
   include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
 endif()
 
-# Building with time profiling support requires LLVM directory includes.
-if(LIBOMP_PROFILING_SUPPORT)
-  include_directories(
-    ${LLVM_MAIN_INCLUDE_DIR}
-    ${LLVM_INCLUDE_DIR}
-  )
-endif()
-
 # Getting correct source files to build library
 set(LIBOMP_CXXFILES)
 set(LIBOMP_ASMFILES)
@@ -143,7 +135,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS)
 
 libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS)
 # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled.
-if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING))
+if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING))
   add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES})
   # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS
   target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS})
@@ -152,8 +144,6 @@ else()
     LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}
     LINK_COMPONENTS Support
     )
-  # libomp must be a C++ library such that it can link libLLVMSupport
-  set(LIBOMP_LINKER_LANGUAGE CXX)
 endif()
 
 set_target_properties(omp PROPERTIES
diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake
index f6aee7197ee8..3d682c690fc7 100644
--- a/openmp/runtime/src/kmp_config.h.cmake
+++ b/openmp/runtime/src/kmp_config.h.cmake
@@ -44,8 +44,8 @@
 #define OMPT_DEBUG LIBOMP_OMPT_DEBUG
 #cmakedefine01 LIBOMP_OMPT_SUPPORT
 #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
-#cmakedefine01 LIBOMP_PROFILING_SUPPORT
-#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT
+#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT
+#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT
 #cmakedefine01 LIBOMP_OMPT_OPTIONAL
 #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL
 #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a6e32bd008e1..4a0634d59cff 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -32,7 +32,7 @@
 #include "ompt-specific.h"
 #endif
 
-#if OMP_PROFILING_SUPPORT
+#if OMPTARGET_PROFILING_SUPPORT
 #include "llvm/Support/TimeProfiler.h"
 static char *ProfileTraceFile = nullptr;
 #endif
@@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
 /* ------------------------------------------------------------------------ */
 
 void *__kmp_launch_thread(kmp_info_t *this_thr) {
-#if OMP_PROFILING_SUPPORT
+#if OMPTARGET_PROFILING_SUPPORT
   ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
   // TODO: add a configuration option for time granularity
   if (ProfileTraceFile)
@@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
   KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
   KMP_MB();
 
-#if OMP_PROFILING_SUPPORT
+#if OMPTARGET_PROFILING_SUPPORT
   llvm::timeTraceProfilerFinishThread();
 #endif
   return this_thr;

From bc2dad1671598a87423c61c355d03db49ce76907 Mon Sep 17 00:00:00 2001
From: Peter Waller <peter.waller@arm.com>
Date: Tue, 26 Jan 2021 11:55:24 +0000
Subject: [PATCH 061/244] [clang][aarch64][WOA64][docs] Release note for
 longjmp crash with /guard:cf

Add a release note workaround for PR47463.

Bug: https://bugs.llvm.org/show_bug.cgi?id=47463

Differential Revision: https://reviews.llvm.org/D95435
---
 clang/docs/ReleaseNotes.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9efd4c01f053..c17d84de320c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -153,6 +153,11 @@ Windows Support
 - Implicitly add ``.exe`` suffix for MinGW targets, even when cross compiling.
   (This matches a change from GCC 8.)
 
+- Windows on Arm64: programs using the C standard library's setjmp and longjmp
+  functions may crash with a "Security check failure or stack buffer overrun"
+  exception. To workaround (with reduced security), compile with
+  /guard:cf,nolongjmp.
+
 C Language Changes in Clang
 ---------------------------
 

From 66c7b449acf402bdc87b69db5778b7b43958d217 Mon Sep 17 00:00:00 2001
From: Giorgis Georgakoudis <georgakoudis1@llnl.gov>
Date: Mon, 25 Jan 2021 14:10:50 -0800
Subject: [PATCH 062/244] [OpenMP] Fix building using LLVM_ENABLE_RUNTIMES

Fix when time profiling is enabled.

Related to: D94855

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95398

(cherry picked from commit bb40e6731843de92f1c73ad6efceb8a89e045ea6)
---
 openmp/CMakeLists.txt             | 10 +++++-----
 openmp/runtime/src/CMakeLists.txt |  9 +++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index 67600bebdafb..f89857dc98d6 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -55,11 +55,6 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING
 set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING
   "OpenMP compiler flag to use for testing OpenMP runtime libraries.")
 
-
-# Build host runtime library.
-add_subdirectory(runtime)
-
-
 set(ENABLE_LIBOMPTARGET ON)
 # Currently libomptarget cannot be compiled on Windows or MacOS X.
 # Since the device plugins are only supported on Linux anyway,
@@ -86,6 +81,11 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading."
        ${ENABLE_LIBOMPTARGET})
 option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget."
        ${ENABLE_LIBOMPTARGET})
+
+# Build host runtime library, after LIBOMPTARGET variables are set since they are needed
+# to enable time profiling support in the OpenMP runtime.
+add_subdirectory(runtime)
+
 if (OPENMP_ENABLE_LIBOMPTARGET)
   # Check that the library can actually be built.
   if (APPLE OR WIN32)
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index 2e927df84f5c..9c5dba55b705 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -50,6 +50,15 @@ if(${LIBOMP_USE_HWLOC})
   include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
 endif()
 
+# Building with time profiling support for libomptarget requires
+# LLVM directory includes.
+if(LIBOMPTARGET_PROFILING_SUPPORT)
+  include_directories(
+    ${LLVM_MAIN_INCLUDE_DIR}
+    ${LLVM_INCLUDE_DIR}
+  )
+endif()
+
 # Getting correct source files to build library
 set(LIBOMP_CXXFILES)
 set(LIBOMP_ASMFILES)

From 92a5106e8055bab7da46095a832904444862728b Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 28 Jan 2021 07:24:19 -0500
Subject: [PATCH 063/244] [OpenMP] Disabled profiling in `libomp` by default to
 unblock link errors

Link error occurred when time profiling in libomp is enabled by default
because `libomp` is assumed to be a C library but the dependence on
`libLLVMSupport` for profiling is a C++ library. Currently the issue blocks all
OpenMP tests in Phabricator.

This patch set a new CMake option `OPENMP_ENABLE_LIBOMP_PROFILING` to
enable/disable the feature. By default it is disabled. Note that once time
profiling is enabled for `libomp`, it becomes a C++ library.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95585

(cherry picked from commit c571b168349fdf22d1dc8b920bcffa3d5161f0a2)
---
 openmp/CMakeLists.txt                 | 1 +
 openmp/docs/design/Runtimes.rst       | 5 ++++-
 openmp/runtime/CMakeLists.txt         | 6 +++---
 openmp/runtime/src/CMakeLists.txt     | 9 +++++----
 openmp/runtime/src/kmp_config.h.cmake | 4 ++--
 openmp/runtime/src/kmp_runtime.cpp    | 6 +++---
 6 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index f89857dc98d6..b8a2822877e3 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -81,6 +81,7 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading."
        ${ENABLE_LIBOMPTARGET})
 option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget."
        ${ENABLE_LIBOMPTARGET})
+option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF)
 
 # Build host runtime library, after LIBOMPTARGET variables are set since they are needed
 # to enable time profiling support in the OpenMP runtime.
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 016b88ba324b..ad36e43eccdc 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -48,7 +48,10 @@ similar to Clang's ``-ftime-trace`` option. This generates a JSON file based on
 `Speedscope App`_. Building this feature depends on the `LLVM Support Library`_
 for time trace output. Using this library is enabled by default when building
 using the CMake option ``OPENMP_ENABLE_LIBOMPTARGET_PROFILING``. The output will
-be saved to the filename specified by the environment variable.
+be saved to the filename specified by the environment variable. For multi-threaded
+applications, profiling in ``libomp`` is also needed. Setting the CMake option
+``OPENMP_ENABLE_LIBOMP_PROFILING=ON`` to enable the feature. Note that this will
+turn ``libomp`` into a C++ library.
 
 .. _`Chrome Tracing`: https://www.chromium.org/developers/how-tos/trace-event-profiling-tool
 
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 9fdd04f41646..8828ff8ef455 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -34,7 +34,6 @@ if(${OPENMP_STANDALONE_BUILD})
   # Should assertions be enabled?  They are on by default.
   set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
     "enable assertions?")
-  set(LIBOMPTARGET_PROFILING_SUPPORT FALSE)
 else() # Part of LLVM build
   # Determine the native architecture from LLVM.
   string(TOLOWER "${LLVM_TARGET_ARCH}" LIBOMP_NATIVE_ARCH)
@@ -66,10 +65,11 @@ else() # Part of LLVM build
     libomp_get_architecture(LIBOMP_ARCH)
   endif ()
   set(LIBOMP_ENABLE_ASSERTIONS ${LLVM_ENABLE_ASSERTIONS})
-  # Time profiling support
-  set(LIBOMPTARGET_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMPTARGET_PROFILING})
 endif()
 
+# Time profiling support
+set(LIBOMP_PROFILING_SUPPORT ${OPENMP_ENABLE_LIBOMP_PROFILING})
+
 # FUJITSU A64FX is a special processor because its cache line size is 256.
 # We need to pass this information into kmp_config.h.
 if(LIBOMP_ARCH STREQUAL "aarch64")
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index 9c5dba55b705..822f9ca2b825 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -50,9 +50,8 @@ if(${LIBOMP_USE_HWLOC})
   include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
 endif()
 
-# Building with time profiling support for libomptarget requires
-# LLVM directory includes.
-if(LIBOMPTARGET_PROFILING_SUPPORT)
+# Building with time profiling support requires LLVM directory includes.
+if(LIBOMP_PROFILING_SUPPORT)
   include_directories(
     ${LLVM_MAIN_INCLUDE_DIR}
     ${LLVM_INCLUDE_DIR}
@@ -144,7 +143,7 @@ libomp_get_ldflags(LIBOMP_CONFIGURED_LDFLAGS)
 
 libomp_get_libflags(LIBOMP_CONFIGURED_LIBFLAGS)
 # Build libomp library. Add LLVMSupport dependency if building in-tree with libomptarget profiling enabled.
-if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMPTARGET_PROFILING))
+if(OPENMP_STANDALONE_BUILD OR (NOT OPENMP_ENABLE_LIBOMP_PROFILING))
   add_library(omp ${LIBOMP_LIBRARY_KIND} ${LIBOMP_SOURCE_FILES})
   # Linking command will include libraries in LIBOMP_CONFIGURED_LIBFLAGS
   target_link_libraries(omp ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS})
@@ -153,6 +152,8 @@ else()
     LINK_LIBS ${LIBOMP_CONFIGURED_LIBFLAGS} ${CMAKE_DL_LIBS}
     LINK_COMPONENTS Support
     )
+  # libomp must be a C++ library such that it can link libLLVMSupport
+  set(LIBOMP_LINKER_LANGUAGE CXX)
 endif()
 
 set_target_properties(omp PROPERTIES
diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake
index 3d682c690fc7..f6aee7197ee8 100644
--- a/openmp/runtime/src/kmp_config.h.cmake
+++ b/openmp/runtime/src/kmp_config.h.cmake
@@ -44,8 +44,8 @@
 #define OMPT_DEBUG LIBOMP_OMPT_DEBUG
 #cmakedefine01 LIBOMP_OMPT_SUPPORT
 #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
-#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT
-#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT
+#cmakedefine01 LIBOMP_PROFILING_SUPPORT
+#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT
 #cmakedefine01 LIBOMP_OMPT_OPTIONAL
 #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL
 #cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 4a0634d59cff..a6e32bd008e1 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -32,7 +32,7 @@
 #include "ompt-specific.h"
 #endif
 
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
 #include "llvm/Support/TimeProfiler.h"
 static char *ProfileTraceFile = nullptr;
 #endif
@@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
 /* ------------------------------------------------------------------------ */
 
 void *__kmp_launch_thread(kmp_info_t *this_thr) {
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
   ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
   // TODO: add a configuration option for time granularity
   if (ProfileTraceFile)
@@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
   KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
   KMP_MB();
 
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
   llvm::timeTraceProfilerFinishThread();
 #endif
   return this_thr;

From 72f12467ded52160d52025e13a6217f00fe25f68 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Thu, 4 Feb 2021 13:26:59 +0100
Subject: [PATCH 064/244] Add a release note about deprecating the clang-cl
 /fallback flag

As discussed in
https://lists.llvm.org/pipermail/cfe-dev/2021-January/067524.html

The flag has been removed on the main branch in D95876.

Differential revision: https://reviews.llvm.org/D96016
---
 clang/docs/ReleaseNotes.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c17d84de320c..f4ca8a855142 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -109,6 +109,10 @@ Deprecated Compiler Flags
 The following options are deprecated and ignored. They will be removed in
 future versions of Clang.
 
+- The clang-cl ``/fallback`` flag, which made clang-cl invoke Microsoft Visual
+  C++ on files it couldn't compile itself, has been deprecated. It will be
+  removed in Clang 13.
+
 - ...
 
 Modified Compiler Flags

From 4e7933905578456a30b281bbbe832d8d938feed0 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 5 Feb 2021 01:40:33 +0000
Subject: [PATCH 065/244] workflows: Update libclang-abi-tests to work with
 minor release baselines

---
 .github/workflows/libclang-abi-tests.yml | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml
index 5681c7c8166e..320a88c1d407 100644
--- a/.github/workflows/libclang-abi-tests.yml
+++ b/.github/workflows/libclang-abi-tests.yml
@@ -20,6 +20,7 @@ jobs:
       ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }}
       ABI_LIBS: ${{ steps.vars.outputs.ABI_LIBS }}
       BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }}
+      BASELINE_VERSION_MINOR: ${{ steps.vars.outputs.BASELINE_VERSION_MINOR }}
       LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
       LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }}
       LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }}
@@ -36,16 +37,35 @@ jobs:
       - name: Setup Variables
         id: vars
         run: |
+          minor_version=0
+          remote_repo='https://github.com/llvm/llvm-project'
           if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then
-            echo ::set-output name=BASELINE_VERSION_MAJOR::$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))
+            major_version=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))
+            baseline_ref="$major_version.0.0"
+
+            # If there is a minor release, we want to use that as the base line.
+            minor_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9] | tail -n1 | grep -o 'llvmorg-.\+' || true`
+            if [ -n "$minor_ref" ]; then
+               baseline_ref=$minor_ref
+            else
+              # Check if we have a release candidate
+              rc_ref=`git ls-remote --refs -t $remote_repo llvmorg-$major_version.[1-9].[0-9]-rc* | tail -n1 | grep -o 'llvmorg-.\+' || true`
+              if [ -n "$rc_ref" ]; then
+                baseline_ref=$rc_ref
+              fi
+            fi
+            echo ::set-output name=BASELINE_VERSION_MAJOR::$major_version
+            echo ::set-output name=BASELINE_REF::$baseline_ref
             echo ::set-output name=ABI_HEADERS::clang-c
             echo ::set-output name=ABI_LIBS::libclang.so
           else
             echo ::set-output name=BASELINE_VERSION_MAJOR::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}
+            echo ::set-output name=BASELINE_REF::${{ steps.version.outputs.LLVM_VERSION_MAJOR }}.0.0
             echo ::set-output name=ABI_HEADERS::.
             echo ::set-output name=ABI_LIBS::libclang.so libclang-cpp.so
           fi
 
+
   abi-dump:
     needs: abi-dump-setup
     runs-on: ubuntu-latest
@@ -57,7 +77,7 @@ jobs:
         include:
           - name: build-baseline
             llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}
-            ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0
+            ref: ${{ needs.abi-dump-setup.outputs.BASELINE_REF }}
             repo: llvm/llvm-project
           - name: build-latest
             llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }}

From 81febec8a327ecbe83575ac280c2931718ab5e33 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 29 Jan 2021 12:56:23 +0100
Subject: [PATCH 066/244] [MemCpyOpt] Add test for incorrect optimization
 across lifetime (NFC)

This only affects the MemorySSA-based implementation.
---
 llvm/test/Transforms/MemCpyOpt/lifetime.ll | 43 ++++++++++++++++++++--
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll
index 1d2b699ee96d..5dc13ca10054 100644
--- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll
+++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
-; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
+; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA
+; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA
 
 ; performCallSlotOptzn in MemCpy should not exchange the calls to
 ; @llvm.lifetime.start and @llvm.memcpy.
@@ -9,8 +9,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i
 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
 
-define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) {
-; CHECK-LABEL: @_ZN4CordC2EOS_(
+define void @call_slot(i8* nocapture dereferenceable(16) %arg1) {
+; CHECK-LABEL: @call_slot(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[ARG1:%.*]], i64 7
 ; CHECK-NEXT:    store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1
@@ -27,4 +27,39 @@ bb:
   ret void
 }
 
+; FIXME: Miscompile.
+define void @memcpy_memcpy_across_lifetime(i8* noalias %p1, i8* noalias %p2, i8* noalias %p3) {
+; NO_MSSA-LABEL: @memcpy_memcpy_across_lifetime(
+; NO_MSSA-NEXT:    [[A:%.*]] = alloca [16 x i8], align 1
+; NO_MSSA-NEXT:    [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0
+; NO_MSSA-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]])
+; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false)
+; NO_MSSA-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false)
+; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
+; NO_MSSA-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]])
+; NO_MSSA-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false)
+; NO_MSSA-NEXT:    ret void
+;
+; MSSA-LABEL: @memcpy_memcpy_across_lifetime(
+; MSSA-NEXT:    [[A:%.*]] = alloca [16 x i8], align 1
+; MSSA-NEXT:    [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0
+; MSSA-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]])
+; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false)
+; MSSA-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false)
+; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
+; MSSA-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]])
+; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
+; MSSA-NEXT:    ret void
+;
+  %a = alloca [16 x i8]
+  %a8 = bitcast [16 x i8]* %a to i8*
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* %a8)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a8, i8* %p1, i64 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p2, i8* %a8, i64 16, i1 false)
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* %a8)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p3, i8* %p2, i64 16, i1 false)
+  ret void
+}
+
 attributes #1 = { argmemonly nounwind }

From 12a772b1a09a1b5c3f43d08c2804973506b8a859 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 31 Jan 2021 17:55:24 +0100
Subject: [PATCH 067/244] [MemorySSA] Don't treat lifetime.end as NoAlias

MemorySSA currently treats lifetime.end intrinsics as not aliasing
anything. This breaks MemorySSA-based MemCpyOpt, because we'll happily
move a read of a pointer below a lifetime.end intrinsic, as no clobber
is reported.

I think the MemorySSA modelling here isn't correct: lifetime.end(p)
has approximately the same effect as doing a memcpy(p, undef), and
should be treated as a clobber.

This patch removes the special handling of lifetime.end, leaving
alias analysis to handle it appropriately.

Differential Revision: https://reviews.llvm.org/D95763
---
 llvm/lib/Analysis/MemorySSA.cpp               | 26 --------------
 .../Analysis/MemorySSA/lifetime-simple.ll     |  9 +++--
 llvm/test/Transforms/MemCpyOpt/lifetime.ll    | 36 +++++++------------
 3 files changed, 16 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index 52dca7d378e1..4722b68e20e9 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -281,7 +281,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
     // clobbers where they don't really exist at all. Please see D43269 for
     // context.
     switch (II->getIntrinsicID()) {
-    case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start:
     case Intrinsic::invariant_end:
     case Intrinsic::assume:
@@ -358,22 +357,6 @@ struct UpwardsMemoryQuery {
 
 } // end anonymous namespace
 
-static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
-                           BatchAAResults &AA) {
-  Instruction *Inst = MD->getMemoryInst();
-  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-    switch (II->getIntrinsicID()) {
-    case Intrinsic::lifetime_end: {
-      MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
-      return AA.alias(ArgLoc, Loc) == MustAlias;
-    }
-    default:
-      return false;
-    }
-  }
-  return false;
-}
-
 template <typename AliasAnalysisType>
 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
                                                    const Instruction *I) {
@@ -1465,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
       }
 
       MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
-      // If the lifetime of the pointer ends at this instruction, it's live on
-      // entry.
-      if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
-        // Reset UpperBound to liveOnEntryDef's place in the stack
-        UpperBound = 0;
-        FoundClobberResult = true;
-        LocInfo.AR = MustAlias;
-        break;
-      }
       ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA);
       if (CA.IsClobber) {
         FoundClobberResult = true;
diff --git a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll
index 33327c5539f6..2d0481c18415 100644
--- a/llvm/test/Analysis/MemorySSA/lifetime-simple.ll
+++ b/llvm/test/Analysis/MemorySSA/lifetime-simple.ll
@@ -1,8 +1,7 @@
 ; RUN: opt -basic-aa -print-memoryssa -verify-memoryssa -enable-new-pm=0 -analyze < %s 2>&1 | FileCheck %s
 ; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
-; This test checks a number of things:
-; First, the lifetime markers should not clobber any uses of Q or P.
-; Second, the loads of P are MemoryUse(LiveOnEntry) due to the placement of the markers vs the loads.
+; This test checks that lifetime markers are considered clobbers of %P,
+; and due to lack of noalias information, of %Q as well.
 
 define i8 @test(i8* %P, i8* %Q) {
 entry:
@@ -18,10 +17,10 @@ entry:
 ; CHECK:  3 = MemoryDef(2)
 ; CHECK-NEXT:   call void @llvm.lifetime.end.p0i8(i64 32, i8* %P)
   call void @llvm.lifetime.end.p0i8(i64 32, i8* %P)
-; CHECK:  MemoryUse(liveOnEntry)
+; CHECK:  MemoryUse(3)
 ; CHECK-NEXT:   %1 = load i8, i8* %P
   %1 = load i8, i8* %P
-; CHECK:  MemoryUse(2)
+; CHECK:  MemoryUse(3)
 ; CHECK-NEXT:   %2 = load i8, i8* %Q
   %2 = load i8, i8* %Q
   ret i8 %1
diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll
index 5dc13ca10054..c7e7666307ab 100644
--- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll
+++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA
-; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA
+; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
+; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
 
 ; performCallSlotOptzn in MemCpy should not exchange the calls to
 ; @llvm.lifetime.start and @llvm.memcpy.
@@ -27,29 +27,17 @@ bb:
   ret void
 }
 
-; FIXME: Miscompile.
 define void @memcpy_memcpy_across_lifetime(i8* noalias %p1, i8* noalias %p2, i8* noalias %p3) {
-; NO_MSSA-LABEL: @memcpy_memcpy_across_lifetime(
-; NO_MSSA-NEXT:    [[A:%.*]] = alloca [16 x i8], align 1
-; NO_MSSA-NEXT:    [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0
-; NO_MSSA-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]])
-; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false)
-; NO_MSSA-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false)
-; NO_MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
-; NO_MSSA-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]])
-; NO_MSSA-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false)
-; NO_MSSA-NEXT:    ret void
-;
-; MSSA-LABEL: @memcpy_memcpy_across_lifetime(
-; MSSA-NEXT:    [[A:%.*]] = alloca [16 x i8], align 1
-; MSSA-NEXT:    [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0
-; MSSA-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]])
-; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false)
-; MSSA-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false)
-; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
-; MSSA-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]])
-; MSSA-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
-; MSSA-NEXT:    ret void
+; CHECK-LABEL: @memcpy_memcpy_across_lifetime(
+; CHECK-NEXT:    [[A:%.*]] = alloca [16 x i8], align 1
+; CHECK-NEXT:    [[A8:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A]], i64 0, i64 0
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull [[A8]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[A8]], i8* nonnull align 1 dereferenceable(16) [[P1:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P1]], i8* nonnull align 1 dereferenceable(16) [[P2:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P2]], i8* nonnull align 1 dereferenceable(16) [[A8]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull [[A8]])
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(16) [[P3:%.*]], i8* nonnull align 1 dereferenceable(16) [[P2]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
 ;
   %a = alloca [16 x i8]
   %a8 = bitcast [16 x i8]* %a to i8*

From 716eef9ad5b367e5cbcc22c8ac53395f9bdbe7a5 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 4 Feb 2021 20:14:14 -0500
Subject: [PATCH 068/244] [OpenMP][libomptarget] Fixed an issue that device
 sync is skipped if the kernel doesn't have any argument

Currently if there is not kernel argument, device synchronization will
be skipped. This can lead to two issues:
1. If there is any device error, it will not be captured;
2. The target region might end before the kernel is done, which is not spec
   conformant.

The test added in this patch only runs on NVPTX platform, although it will not
be executed by Phab at all. It also requires `not` which is not available on most
systems.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D96067

(cherry picked from commit b68a6b09e60a24733b923a0fc282746a855852da)
---
 openmp/libomptarget/src/omptarget.cpp         | 22 +++++++++++++++----
 .../libomptarget/test/offloading/assert.cpp   |  8 +++++++
 2 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 openmp/libomptarget/test/offloading/assert.cpp

diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 90966d25fb26..e4b7b18bc70b 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -451,6 +451,17 @@ struct DeallocTgtPtrInfo {
       : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete),
         HasCloseModifier(HasCloseModifier) {}
 };
+
+/// Synchronize device
+static int syncDevice(DeviceTy &Device, __tgt_async_info *AsyncInfo) {
+  assert(AsyncInfo && AsyncInfo->Queue && "Invalid AsyncInfo");
+  if (Device.synchronize(AsyncInfo) != OFFLOAD_SUCCESS) {
+    REPORT("Failed to synchronize device.\n");
+    return OFFLOAD_FAIL;
+  }
+
+  return OFFLOAD_SUCCESS;
+}
 } // namespace
 
 /// Internal function to undo the mapping and retrieve the data from the device.
@@ -631,11 +642,9 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
   // AsyncInfo->Queue will not be nullptr, so again, we don't need to
   // synchronize.
   if (AsyncInfo && AsyncInfo->Queue) {
-    Ret = Device.synchronize(AsyncInfo);
-    if (Ret != OFFLOAD_SUCCESS) {
-      REPORT("Failed to synchronize device.\n");
+    Ret = syncDevice(Device, AsyncInfo);
+    if (Ret != OFFLOAD_SUCCESS)
       return OFFLOAD_FAIL;
-    }
   }
 
   // Deallocate target pointer
@@ -1307,6 +1316,11 @@ int target(ident_t *loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum,
       REPORT("Failed to process data after launching the kernel.\n");
       return OFFLOAD_FAIL;
     }
+  } else if (AsyncInfo.Queue) {
+    // If ArgNum is zero, but AsyncInfo.Queue is valid, then the kernel doesn't
+    // hava any argument, and the device supports async operations, so we need a
+    // sync at this point.
+    return syncDevice(Device, &AsyncInfo);
   }
 
   return OFFLOAD_SUCCESS;
diff --git a/openmp/libomptarget/test/offloading/assert.cpp b/openmp/libomptarget/test/offloading/assert.cpp
new file mode 100644
index 000000000000..00112dd92cc6
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/assert.cpp
@@ -0,0 +1,8 @@
+// RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda && %libomptarget-run-fail-nvptx64-nvidia-cuda
+
+int main(int argc, char *argv[]) {
+#pragma omp target
+  { __builtin_trap(); }
+
+  return 0;
+}

From 395ef8d5c67905646b72dd5ef2d8eb60cabb8634 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 2 Feb 2021 16:58:38 -0500
Subject: [PATCH 069/244] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Rename=20inclu?=
 =?UTF-8?q?de/support=20to=20include/=5F=5Fsupport?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We do ship those headers, so the directory name should not be something
that can potentially conflict with user-defined directories.

This is a cherry-pick of b51756819a85563ae063e98eeb3d6af8e44c8f64.

Differential Revision: https://reviews.llvm.org/D96059
---
 libcxx/include/CMakeLists.txt                 | 38 +++++++++----------
 libcxx/include/__locale                       | 20 +++++-----
 .../android/locale_bionic.h                   |  6 +--
 .../{support => __support}/fuchsia/xlocale.h  |  6 +--
 .../{support => __support}/ibm/limits.h       |  2 +-
 .../ibm/locale_mgmt_aix.h                     |  2 +-
 .../{support => __support}/ibm/nanosleep.h    |  0
 .../{support => __support}/ibm/support.h      |  2 +-
 .../{support => __support}/ibm/xlocale.h      |  7 ++--
 .../{support => __support}/musl/xlocale.h     |  2 +-
 .../{support => __support}/newlib/xlocale.h   |  6 +--
 .../{support => __support}/nuttx/xlocale.h    |  6 +--
 .../{support => __support}/openbsd/xlocale.h  |  4 +-
 .../solaris/floatingpoint.h                   |  0
 .../{support => __support}/solaris/wchar.h    |  0
 .../{support => __support}/solaris/xlocale.h  |  0
 .../win32/limits_msvc_win32.h                 |  2 +-
 .../win32/locale_win32.h                      |  2 +-
 .../xlocale/__nop_locale_mgmt.h               |  2 +-
 .../xlocale/__posix_l_fallback.h              |  2 +-
 .../xlocale/__strtonum_fallback.h             |  2 +-
 libcxx/include/__threading_support            |  2 +-
 libcxx/include/bit                            |  2 +-
 libcxx/include/limits                         |  4 +-
 libcxx/src/CMakeLists.txt                     |  2 +-
 libcxx/src/locale.cpp                         |  2 +-
 libcxx/src/support/solaris/xlocale.cpp        |  2 +-
 libcxx/src/support/win32/locale_win32.cpp     |  2 +-
 libcxx/src/support/win32/support.cpp          |  2 +-
 libcxx/src/support/win32/thread_win32.cpp     |  2 +-
 .../gn/secondary/libcxx/include/BUILD.gn      | 38 +++++++++----------
 31 files changed, 85 insertions(+), 84 deletions(-)
 rename libcxx/include/{support => __support}/android/locale_bionic.h (90%)
 rename libcxx/include/{support => __support}/fuchsia/xlocale.h (74%)
 rename libcxx/include/{support => __support}/ibm/limits.h (97%)
 rename libcxx/include/{support => __support}/ibm/locale_mgmt_aix.h (96%)
 rename libcxx/include/{support => __support}/ibm/nanosleep.h (100%)
 rename libcxx/include/{support => __support}/ibm/support.h (95%)
 rename libcxx/include/{support => __support}/ibm/xlocale.h (97%)
 rename libcxx/include/{support => __support}/musl/xlocale.h (95%)
 rename libcxx/include/{support => __support}/newlib/xlocale.h (82%)
 rename libcxx/include/{support => __support}/nuttx/xlocale.h (70%)
 rename libcxx/include/{support => __support}/openbsd/xlocale.h (78%)
 rename libcxx/include/{support => __support}/solaris/floatingpoint.h (100%)
 rename libcxx/include/{support => __support}/solaris/wchar.h (100%)
 rename libcxx/include/{support => __support}/solaris/xlocale.h (100%)
 rename libcxx/include/{support => __support}/win32/limits_msvc_win32.h (96%)
 rename libcxx/include/{support => __support}/win32/locale_win32.h (99%)
 rename libcxx/include/{support => __support}/xlocale/__nop_locale_mgmt.h (94%)
 rename libcxx/include/{support => __support}/xlocale/__posix_l_fallback.h (98%)
 rename libcxx/include/{support => __support}/xlocale/__strtonum_fallback.h (96%)

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 77e5e556d684..29a317b8ae9a 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -150,25 +150,25 @@ set(files
   string.h
   string_view
   strstream
-  support/android/locale_bionic.h
-  support/fuchsia/xlocale.h
-  support/ibm/limits.h
-  support/ibm/locale_mgmt_aix.h
-  support/ibm/nanosleep.h
-  support/ibm/support.h
-  support/ibm/xlocale.h
-  support/musl/xlocale.h
-  support/newlib/xlocale.h
-  support/nuttx/xlocale.h
-  support/openbsd/xlocale.h
-  support/solaris/floatingpoint.h
-  support/solaris/wchar.h
-  support/solaris/xlocale.h
-  support/win32/limits_msvc_win32.h
-  support/win32/locale_win32.h
-  support/xlocale/__nop_locale_mgmt.h
-  support/xlocale/__posix_l_fallback.h
-  support/xlocale/__strtonum_fallback.h
+  __support/android/locale_bionic.h
+  __support/fuchsia/xlocale.h
+  __support/ibm/limits.h
+  __support/ibm/locale_mgmt_aix.h
+  __support/ibm/nanosleep.h
+  __support/ibm/support.h
+  __support/ibm/xlocale.h
+  __support/musl/xlocale.h
+  __support/newlib/xlocale.h
+  __support/nuttx/xlocale.h
+  __support/openbsd/xlocale.h
+  __support/solaris/floatingpoint.h
+  __support/solaris/wchar.h
+  __support/solaris/xlocale.h
+  __support/win32/limits_msvc_win32.h
+  __support/win32/locale_win32.h
+  __support/xlocale/__nop_locale_mgmt.h
+  __support/xlocale/__posix_l_fallback.h
+  __support/xlocale/__strtonum_fallback.h
   system_error
   tgmath.h
   thread
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index a2da7d78049f..77e5faab2676 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -21,30 +21,30 @@
 #include <locale.h>
 #if defined(_LIBCPP_MSVCRT_LIKE)
 # include <cstring>
-# include <support/win32/locale_win32.h>
+# include <__support/win32/locale_win32.h>
 #elif defined(__NuttX__)
-# include <support/nuttx/xlocale.h>
+# include <__support/nuttx/xlocale.h>
 #elif defined(_AIX) || defined(__MVS__)
-# include <support/ibm/xlocale.h>
+# include <__support/ibm/xlocale.h>
 #elif defined(__ANDROID__)
-# include <support/android/locale_bionic.h>
+# include <__support/android/locale_bionic.h>
 #elif defined(__sun__)
 # include <xlocale.h>
-# include <support/solaris/xlocale.h>
+# include <__support/solaris/xlocale.h>
 #elif defined(_NEWLIB_VERSION)
-# include <support/newlib/xlocale.h>
+# include <__support/newlib/xlocale.h>
 #elif defined(__OpenBSD__)
-# include <support/openbsd/xlocale.h>
+# include <__support/openbsd/xlocale.h>
 #elif (defined(__APPLE__)      || defined(__FreeBSD__) \
     || defined(__EMSCRIPTEN__) || defined(__IBMCPP__))
 # include <xlocale.h>
 #elif defined(__Fuchsia__)
-# include <support/fuchsia/xlocale.h>
+# include <__support/fuchsia/xlocale.h>
 #elif defined(__wasi__)
 // WASI libc uses musl's locales support.
-# include <support/musl/xlocale.h>
+# include <__support/musl/xlocale.h>
 #elif defined(_LIBCPP_HAS_MUSL_LIBC)
-# include <support/musl/xlocale.h>
+# include <__support/musl/xlocale.h>
 #endif
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/support/android/locale_bionic.h b/libcxx/include/__support/android/locale_bionic.h
similarity index 90%
rename from libcxx/include/support/android/locale_bionic.h
rename to libcxx/include/__support/android/locale_bionic.h
index f05a6a0522ca..8c6d4bd0dc32 100644
--- a/libcxx/include/support/android/locale_bionic.h
+++ b/libcxx/include/__support/android/locale_bionic.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===------------------- support/android/locale_bionic.h ------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -28,13 +28,13 @@ extern "C" {
 #include <android/api-level.h>
 #include <android/ndk-version.h>
 #if __ANDROID_API__ < 21
-#include <support/xlocale/__posix_l_fallback.h>
+#include <__support/xlocale/__posix_l_fallback.h>
 #endif
 // In NDK versions later than 16, locale-aware functions are provided by
 // legacy_stdlib_inlines.h
 #if __NDK_MAJOR__ <= 16
 #if __ANDROID_API__ < 21
-#include <support/xlocale/__strtonum_fallback.h>
+#include <__support/xlocale/__strtonum_fallback.h>
 #elif __ANDROID_API__ < 26
 
 #if defined(__cplusplus)
diff --git a/libcxx/include/support/fuchsia/xlocale.h b/libcxx/include/__support/fuchsia/xlocale.h
similarity index 74%
rename from libcxx/include/support/fuchsia/xlocale.h
rename to libcxx/include/__support/fuchsia/xlocale.h
index b86ce9efbd11..e8def81480ea 100644
--- a/libcxx/include/support/fuchsia/xlocale.h
+++ b/libcxx/include/__support/fuchsia/xlocale.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===------------------- support/fuchsia/xlocale.h ------------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,8 +14,8 @@
 
 #include <cstdlib>
 #include <cwchar>
-#include <support/xlocale/__posix_l_fallback.h>
-#include <support/xlocale/__strtonum_fallback.h>
+#include <__support/xlocale/__posix_l_fallback.h>
+#include <__support/xlocale/__strtonum_fallback.h>
 
 #endif // defined(__Fuchsia__)
 
diff --git a/libcxx/include/support/ibm/limits.h b/libcxx/include/__support/ibm/limits.h
similarity index 97%
rename from libcxx/include/support/ibm/limits.h
rename to libcxx/include/__support/ibm/limits.h
index d1c59f066a87..45f1f1e3684c 100644
--- a/libcxx/include/support/ibm/limits.h
+++ b/libcxx/include/__support/ibm/limits.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===--------------------- support/ibm/limits.h ---------------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/ibm/locale_mgmt_aix.h b/libcxx/include/__support/ibm/locale_mgmt_aix.h
similarity index 96%
rename from libcxx/include/support/ibm/locale_mgmt_aix.h
rename to libcxx/include/__support/ibm/locale_mgmt_aix.h
index e452dc32529d..4f658c3eee30 100644
--- a/libcxx/include/support/ibm/locale_mgmt_aix.h
+++ b/libcxx/include/__support/ibm/locale_mgmt_aix.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===------------------- support/ibm/locale_mgmt_aix.h --------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/ibm/nanosleep.h b/libcxx/include/__support/ibm/nanosleep.h
similarity index 100%
rename from libcxx/include/support/ibm/nanosleep.h
rename to libcxx/include/__support/ibm/nanosleep.h
diff --git a/libcxx/include/support/ibm/support.h b/libcxx/include/__support/ibm/support.h
similarity index 95%
rename from libcxx/include/support/ibm/support.h
rename to libcxx/include/__support/ibm/support.h
index 0569cbe7460d..a7751b017666 100644
--- a/libcxx/include/support/ibm/support.h
+++ b/libcxx/include/__support/ibm/support.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===----------------------- support/ibm/support.h ----------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/ibm/xlocale.h b/libcxx/include/__support/ibm/xlocale.h
similarity index 97%
rename from libcxx/include/support/ibm/xlocale.h
rename to libcxx/include/__support/ibm/xlocale.h
index fde137cde260..ad07a255fc95 100644
--- a/libcxx/include/support/ibm/xlocale.h
+++ b/libcxx/include/__support/ibm/xlocale.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===--------------------- support/ibm/xlocale.h -------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,8 @@
 
 #ifndef _LIBCPP_SUPPORT_IBM_XLOCALE_H
 #define _LIBCPP_SUPPORT_IBM_XLOCALE_H
-#include <support/ibm/locale_mgmt_aix.h>
+
+#include <__support/ibm/locale_mgmt_aix.h>
 
 #include "cstdlib"
 
@@ -218,7 +219,7 @@ size_t strftime_l(char *__s, size_t __size, const char *__fmt,
 #elif defined(__MVS__)
 #include <wctype.h>
 // POSIX routines
-#include <support/xlocale/__posix_l_fallback.h>
+#include <__support/xlocale/__posix_l_fallback.h>
 #endif // defined(__MVS__)
 
 // The following are not POSIX routines.  These are quick-and-dirty hacks
diff --git a/libcxx/include/support/musl/xlocale.h b/libcxx/include/__support/musl/xlocale.h
similarity index 95%
rename from libcxx/include/support/musl/xlocale.h
rename to libcxx/include/__support/musl/xlocale.h
index 722d13fa1d66..2508a8e8e0ca 100644
--- a/libcxx/include/support/musl/xlocale.h
+++ b/libcxx/include/__support/musl/xlocale.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===------------------- support/musl/xlocale.h ------------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/newlib/xlocale.h b/libcxx/include/__support/newlib/xlocale.h
similarity index 82%
rename from libcxx/include/support/newlib/xlocale.h
rename to libcxx/include/__support/newlib/xlocale.h
index 25fa798b6d02..b75f9263a4c4 100644
--- a/libcxx/include/support/newlib/xlocale.h
+++ b/libcxx/include/__support/newlib/xlocale.h
@@ -17,9 +17,9 @@
 #include <ctype.h>
 #if !defined(__NEWLIB__) || __NEWLIB__ < 2 || \
     __NEWLIB__ == 2 && __NEWLIB_MINOR__ < 5
-#include <support/xlocale/__nop_locale_mgmt.h>
-#include <support/xlocale/__posix_l_fallback.h>
-#include <support/xlocale/__strtonum_fallback.h>
+#include <__support/xlocale/__nop_locale_mgmt.h>
+#include <__support/xlocale/__posix_l_fallback.h>
+#include <__support/xlocale/__strtonum_fallback.h>
 #endif
 
 #endif // _NEWLIB_VERSION
diff --git a/libcxx/include/support/nuttx/xlocale.h b/libcxx/include/__support/nuttx/xlocale.h
similarity index 70%
rename from libcxx/include/support/nuttx/xlocale.h
rename to libcxx/include/__support/nuttx/xlocale.h
index b70d62005046..be738e3b64e4 100644
--- a/libcxx/include/support/nuttx/xlocale.h
+++ b/libcxx/include/__support/nuttx/xlocale.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===-------------------- support/nuttx/xlocale.h -------------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,8 +11,8 @@
 #define _LIBCPP_SUPPORT_NUTTX_XLOCALE_H
 
 #if defined(__NuttX__)
-#include <support/xlocale/__posix_l_fallback.h>
-#include <support/xlocale/__strtonum_fallback.h>
+#include <__support/xlocale/__posix_l_fallback.h>
+#include <__support/xlocale/__strtonum_fallback.h>
 #endif // __NuttX__
 
 #endif
diff --git a/libcxx/include/support/openbsd/xlocale.h b/libcxx/include/__support/openbsd/xlocale.h
similarity index 78%
rename from libcxx/include/support/openbsd/xlocale.h
rename to libcxx/include/__support/openbsd/xlocale.h
index fbfaedd127c6..1136fa327fac 100644
--- a/libcxx/include/support/openbsd/xlocale.h
+++ b/libcxx/include/__support/openbsd/xlocale.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===-------------------- support/openbsd/xlocale.h -----------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,6 +14,6 @@
 #include <clocale>
 #include <cwctype>
 #include <ctype.h>
-#include <support/xlocale/__strtonum_fallback.h>
+#include <__support/xlocale/__strtonum_fallback.h>
 
 #endif
diff --git a/libcxx/include/support/solaris/floatingpoint.h b/libcxx/include/__support/solaris/floatingpoint.h
similarity index 100%
rename from libcxx/include/support/solaris/floatingpoint.h
rename to libcxx/include/__support/solaris/floatingpoint.h
diff --git a/libcxx/include/support/solaris/wchar.h b/libcxx/include/__support/solaris/wchar.h
similarity index 100%
rename from libcxx/include/support/solaris/wchar.h
rename to libcxx/include/__support/solaris/wchar.h
diff --git a/libcxx/include/support/solaris/xlocale.h b/libcxx/include/__support/solaris/xlocale.h
similarity index 100%
rename from libcxx/include/support/solaris/xlocale.h
rename to libcxx/include/__support/solaris/xlocale.h
diff --git a/libcxx/include/support/win32/limits_msvc_win32.h b/libcxx/include/__support/win32/limits_msvc_win32.h
similarity index 96%
rename from libcxx/include/support/win32/limits_msvc_win32.h
rename to libcxx/include/__support/win32/limits_msvc_win32.h
index 7bb835559a3b..758d24647b1b 100644
--- a/libcxx/include/support/win32/limits_msvc_win32.h
+++ b/libcxx/include/__support/win32/limits_msvc_win32.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===------------------ support/win32/limits_msvc_win32.h -----------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/win32/locale_win32.h b/libcxx/include/__support/win32/locale_win32.h
similarity index 99%
rename from libcxx/include/support/win32/locale_win32.h
rename to libcxx/include/__support/win32/locale_win32.h
index 897c36be70c6..d32a7a8ad304 100644
--- a/libcxx/include/support/win32/locale_win32.h
+++ b/libcxx/include/__support/win32/locale_win32.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===--------------------- support/win32/locale_win32.h -------------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/xlocale/__nop_locale_mgmt.h b/libcxx/include/__support/xlocale/__nop_locale_mgmt.h
similarity index 94%
rename from libcxx/include/support/xlocale/__nop_locale_mgmt.h
rename to libcxx/include/__support/xlocale/__nop_locale_mgmt.h
index f33d3894c3a9..57b18842ff45 100644
--- a/libcxx/include/support/xlocale/__nop_locale_mgmt.h
+++ b/libcxx/include/__support/xlocale/__nop_locale_mgmt.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===------------  support/xlocale/__nop_locale_mgmt.h -----------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h
similarity index 98%
rename from libcxx/include/support/xlocale/__posix_l_fallback.h
rename to libcxx/include/__support/xlocale/__posix_l_fallback.h
index f3df6c46fbab..00d69d19e8c8 100644
--- a/libcxx/include/support/xlocale/__posix_l_fallback.h
+++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===--------------- support/xlocale/__posix_l_fallback.h -----------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/support/xlocale/__strtonum_fallback.h b/libcxx/include/__support/xlocale/__strtonum_fallback.h
similarity index 96%
rename from libcxx/include/support/xlocale/__strtonum_fallback.h
rename to libcxx/include/__support/xlocale/__strtonum_fallback.h
index df38598056a6..1172a5d57236 100644
--- a/libcxx/include/support/xlocale/__strtonum_fallback.h
+++ b/libcxx/include/__support/xlocale/__strtonum_fallback.h
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===-------------- support/xlocale/__strtonum_fallback.h -----------------===//
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support
index 473c9c3bbe49..de572f3ff84d 100644
--- a/libcxx/include/__threading_support
+++ b/libcxx/include/__threading_support
@@ -17,7 +17,7 @@
 #include <errno.h>
 
 #ifdef __MVS__
-# include <support/ibm/nanosleep.h>
+# include <__support/ibm/nanosleep.h>
 #endif
 
 #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
diff --git a/libcxx/include/bit b/libcxx/include/bit
index fe360179c5ca..f8c37c3d6bbf 100644
--- a/libcxx/include/bit
+++ b/libcxx/include/bit
@@ -62,7 +62,7 @@ namespace std {
 #include <__debug>
 
 #if defined(__IBMCPP__)
-#include "support/ibm/support.h"
+#include "__support/ibm/support.h"
 #endif
 #if defined(_LIBCPP_COMPILER_MSVC)
 #include <intrin.h>
diff --git a/libcxx/include/limits b/libcxx/include/limits
index 6d5d1e1aca75..8f97cd10a8b1 100644
--- a/libcxx/include/limits
+++ b/libcxx/include/limits
@@ -105,11 +105,11 @@ template<> class numeric_limits<cv long double>;
 #include <type_traits>
 
 #if defined(_LIBCPP_COMPILER_MSVC)
-#include "support/win32/limits_msvc_win32.h"
+#include "__support/win32/limits_msvc_win32.h"
 #endif // _LIBCPP_MSVCRT
 
 #if defined(__IBMCPP__)
-#include "support/ibm/limits.h"
+#include "__support/ibm/limits.h"
 #endif // __IBMCPP__
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index c482068fa99a..9965104cb5b2 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -107,7 +107,7 @@ endif()
 if (LIBCXX_CONFIGURE_IDE)
   file(GLOB_RECURSE LIBCXX_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/*)
   if(WIN32)
-    file( GLOB LIBCXX_WIN32_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/support/win32/*.h)
+    file( GLOB LIBCXX_WIN32_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/../include/__support/win32/*.h)
     list(APPEND LIBCXX_HEADERS ${LIBCXX_WIN32_HEADERS})
   endif()
   # Force them all into the headers dir on MSVC, otherwise they end up at
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index f109389f68f3..a0209d0ce8cf 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -29,7 +29,7 @@
 #include "cwctype"
 #include "__sso_allocator"
 #if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__)
-#include "support/win32/locale_win32.h"
+#include "__support/win32/locale_win32.h"
 #elif !defined(__BIONIC__) && !defined(__NuttX__)
 #include <langinfo.h>
 #endif
diff --git a/libcxx/src/support/solaris/xlocale.cpp b/libcxx/src/support/solaris/xlocale.cpp
index d68a39f4dfe5..d25adcd21d30 100644
--- a/libcxx/src/support/solaris/xlocale.cpp
+++ b/libcxx/src/support/solaris/xlocale.cpp
@@ -8,7 +8,7 @@
 
 #ifdef __sun__
 
-#include "support/solaris/xlocale.h"
+#include "__support/solaris/xlocale.h"
 #include <stdarg.h>
 #include <stdio.h>
 #include <sys/localedef.h>
diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp
index b7062db352ad..e7c6005fc1a3 100644
--- a/libcxx/src/support/win32/locale_win32.cpp
+++ b/libcxx/src/support/win32/locale_win32.cpp
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===-------------------- support/win32/locale_win32.cpp ------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/support/win32/support.cpp b/libcxx/src/support/win32/support.cpp
index d156e02e3e84..52453f547926 100644
--- a/libcxx/src/support/win32/support.cpp
+++ b/libcxx/src/support/win32/support.cpp
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===----------------------- support/win32/support.h ----------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/support/win32/thread_win32.cpp b/libcxx/src/support/win32/thread_win32.cpp
index 83e7e9f6ce5b..35c4c871457d 100644
--- a/libcxx/src/support/win32/thread_win32.cpp
+++ b/libcxx/src/support/win32/thread_win32.cpp
@@ -1,5 +1,5 @@
 // -*- C++ -*-
-//===-------------------- support/win32/thread_win32.cpp ------------------===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 644f0a767558..2ca495b08fba 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -217,25 +217,25 @@ copy("include") {
     "string.h",
     "string_view",
     "strstream",
-    "support/android/locale_bionic.h",
-    "support/fuchsia/xlocale.h",
-    "support/ibm/limits.h",
-    "support/ibm/locale_mgmt_aix.h",
-    "support/ibm/nanosleep.h",
-    "support/ibm/support.h",
-    "support/ibm/xlocale.h",
-    "support/musl/xlocale.h",
-    "support/newlib/xlocale.h",
-    "support/nuttx/xlocale.h",
-    "support/openbsd/xlocale.h",
-    "support/solaris/floatingpoint.h",
-    "support/solaris/wchar.h",
-    "support/solaris/xlocale.h",
-    "support/win32/limits_msvc_win32.h",
-    "support/win32/locale_win32.h",
-    "support/xlocale/__nop_locale_mgmt.h",
-    "support/xlocale/__posix_l_fallback.h",
-    "support/xlocale/__strtonum_fallback.h",
+    "__support/android/locale_bionic.h",
+    "__support/fuchsia/xlocale.h",
+    "__support/ibm/limits.h",
+    "__support/ibm/locale_mgmt_aix.h",
+    "__support/ibm/nanosleep.h",
+    "__support/ibm/support.h",
+    "__support/ibm/xlocale.h",
+    "__support/musl/xlocale.h",
+    "__support/newlib/xlocale.h",
+    "__support/nuttx/xlocale.h",
+    "__support/openbsd/xlocale.h",
+    "__support/solaris/floatingpoint.h",
+    "__support/solaris/wchar.h",
+    "__support/solaris/xlocale.h",
+    "__support/win32/limits_msvc_win32.h",
+    "__support/win32/locale_win32.h",
+    "__support/xlocale/__nop_locale_mgmt.h",
+    "__support/xlocale/__posix_l_fallback.h",
+    "__support/xlocale/__strtonum_fallback.h",
     "system_error",
     "tgmath.h",
     "thread",

From bc39d53d9a4f1ed7c903648f3fd408296fd55c95 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Mon, 1 Feb 2021 15:18:42 -0800
Subject: [PATCH 070/244] =?UTF-8?q?[=F0=9F=8D=92]Disable=20CFI=20in=20=5F?=
 =?UTF-8?q?=5Fget=5Felem=20to=20allow=20casting=20a=20pointer=20to=20unini?=
 =?UTF-8?q?tialized=20memory?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes usage of shared_ptr with CFI enabled, which is llvm.org/pr48993.

(cherry pick of commit bab74864168bb5e28ecbc0294fe1095d8da7f569)

Differential Revision: https://reviews.llvm.org/D96063
---
 libcxx/include/memory | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/memory b/libcxx/include/memory
index a00916c8c03f..39d0f5bee6a5 100644
--- a/libcxx/include/memory
+++ b/libcxx/include/memory
@@ -2647,7 +2647,7 @@ private:
             _Alloc *__alloc = reinterpret_cast<_Alloc*>(__first);
             return __alloc;
         }
-        _Tp* __get_elem() _NOEXCEPT {
+        _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT {
             _CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_);
             typename _CompressedPair::_Base2* __second = _CompressedPair::__get_second_base(__as_pair);
             _Tp *__elem = reinterpret_cast<_Tp*>(__second);

From 251f3295b498b699aa2b926167a788a6b6dbc033 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 3 Feb 2021 17:00:20 -0500
Subject: [PATCH 071/244] =?UTF-8?q?[=F0=9F=8D=92][libc++]=20Fix=20libcxx?=
 =?UTF-8?q?=20build=20on=2032bit=20architectures=20with=2064bit=20time=5Ft?=
 =?UTF-8?q?=20defaults=20e.g.=20riscv32?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch by Khem Raj.

(cherry pick of commit 85b9c5ccc172a1e61c7ecaaec4752587cb6f1e26)

Differential Revision: https://reviews.llvm.org/D96062
---
 libcxx/src/atomic.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index 6b73ed771cd1..9ae1fb5199bf 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -19,6 +19,12 @@
 #include <linux/futex.h>
 #include <sys/syscall.h>
 
+// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures
+// with a 64 bit time_t, we need to specify SYS_futex_time64.
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+# define SYS_futex SYS_futex_time64
+#endif
+
 #else // <- Add other operating systems here
 
 // Baseline needs no new headers

From d7d818c3615e4ff6bb283df0c1ddbb2b2cd50075 Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <a20012251@gmail.com>
Date: Wed, 27 Jan 2021 13:02:45 -0800
Subject: [PATCH 072/244] Fix runInTerminal failures on Windows

stella.stemenova mentioned in https://reviews.llvm.org/D93951 failures on Windows for this test.

I'm fixing the macro definitions and disabling the tests for python
versions lower than 3.7. I'll figure out that actual issue with
python3.6 after the buildbots are fine again.

(cherry picked from commit ab5591e1d8f5abcfa9e75193d3e8a29087b61425)
---
 .../runInTerminal/TestVSCode_runInTerminal.py | 34 +++++++++++++++----
 lldb/tools/lldb-vscode/FifoFiles.cpp          | 10 +++---
 lldb/tools/lldb-vscode/FifoFiles.h            |  1 +
 lldb/tools/lldb-vscode/lldb-vscode.cpp        |  4 +--
 4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py
index 055b5a5bed87..047cc317596f 100644
--- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py
+++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py
@@ -33,20 +33,30 @@ def readErrorMessage(self, fifo_file):
         with open(fifo_file, "r") as file:
             return file.readline()
 
+    def isTestSupported(self):
+        # For some strange reason, this test fails on python3.6
+        if not (sys.version_info.major == 3 and sys.version_info.minor >= 7):
+            return False
+        try:
+            # We skip this test for debug builds because it takes too long parsing lldb's own
+            # debug info. Release builds are fine.
+            # Checking the size of the lldb-vscode binary seems to be a decent proxy for a quick
+            # detection. It should be far less than 1 MB in Release builds.
+            if os.path.getsize(os.environ["LLDBVSCODE_EXEC"]) < 1000000:
+                return True
+        except:
+            return False
+
     @skipIfWindows
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_runInTerminal(self):
+        if not self.isTestSupported():
+            return
         '''
             Tests the "runInTerminal" reverse request. It makes sure that the IDE can
             launch the inferior with the correct environment variables and arguments.
         '''
-        if "debug" in str(os.environ["LLDBVSCODE_EXEC"]).lower():
-            # We skip this test for debug builds because it takes too long parsing lldb's own
-            # debug info. Release builds are fine.
-            # Checking this environment variable seems to be a decent proxy for a quick
-            # detection
-            return
         program = self.getBuildArtifact("a.out")
         source = 'main.c'
         self.build_and_launch(
@@ -77,6 +87,8 @@ def test_runInTerminal(self):
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_runInTerminalInvalidTarget(self):
+        if not self.isTestSupported():
+            return
         self.build_and_create_debug_adaptor()
         response = self.launch(
             "INVALIDPROGRAM", stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"], expectFailure=True)
@@ -88,6 +100,8 @@ def test_runInTerminalInvalidTarget(self):
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_missingArgInRunInTerminalLauncher(self):
+        if not self.isTestSupported():
+            return
         proc = subprocess.run([self.lldbVSCodeExec,  "--launch-target", "INVALIDPROGRAM"],
             capture_output=True, universal_newlines=True)
         self.assertTrue(proc.returncode != 0)
@@ -97,6 +111,8 @@ def test_missingArgInRunInTerminalLauncher(self):
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self):
+        if not self.isTestSupported():
+            return
         comm_file = os.path.join(self.getBuildDir(), "comm-file")
         os.mkfifo(comm_file)
 
@@ -115,6 +131,8 @@ def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self):
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self):
+        if not self.isTestSupported():
+            return
         comm_file = os.path.join(self.getBuildDir(), "comm-file")
         os.mkfifo(comm_file)
 
@@ -132,6 +150,8 @@ def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self):
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self):
+        if not self.isTestSupported():
+            return
         comm_file = os.path.join(self.getBuildDir(), "comm-file")
         os.mkfifo(comm_file)
 
@@ -150,6 +170,8 @@ def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self):
     @skipIfRemote
     @skipIf(archs=no_match(['x86_64']))
     def test_NonAttachedRunInTerminalLauncher(self):
+        if not self.isTestSupported():
+            return
         comm_file = os.path.join(self.getBuildDir(), "comm-file")
         os.mkfifo(comm_file)
 
diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-vscode/FifoFiles.cpp
index b69970ec0168..0a36c87d4a94 100644
--- a/lldb/tools/lldb-vscode/FifoFiles.cpp
+++ b/lldb/tools/lldb-vscode/FifoFiles.cpp
@@ -6,7 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if !defined(WIN32)
+#include "FifoFiles.h"
+
+#if LLVM_ON_UNIX
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -21,8 +23,6 @@
 
 #include "lldb/lldb-defines.h"
 
-#include "FifoFiles.h"
-
 using namespace llvm;
 
 namespace lldb_vscode {
@@ -30,13 +30,13 @@ namespace lldb_vscode {
 FifoFile::FifoFile(StringRef path) : m_path(path) {}
 
 FifoFile::~FifoFile() {
-#if !defined(WIN32)
+#if LLVM_ON_UNIX
   unlink(m_path.c_str());
 #endif
 };
 
 Expected<std::shared_ptr<FifoFile>> CreateFifoFile(StringRef path) {
-#if defined(WIN32)
+#if !LLVM_ON_UNIX
   return createStringError(inconvertibleErrorCode(), "Unimplemented");
 #else
   if (int err = mkfifo(path.data(), 0600))
diff --git a/lldb/tools/lldb-vscode/FifoFiles.h b/lldb/tools/lldb-vscode/FifoFiles.h
index 891b6f574601..f186f65e86c4 100644
--- a/lldb/tools/lldb-vscode/FifoFiles.h
+++ b/lldb/tools/lldb-vscode/FifoFiles.h
@@ -9,6 +9,7 @@
 #ifndef LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H
 #define LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H
 
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Error.h"
 
 #include "JSONUtils.h"
diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp
index c581b9b4a9a0..69eb2e70aa6d 100644
--- a/lldb/tools/lldb-vscode/lldb-vscode.cpp
+++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp
@@ -3002,8 +3002,8 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) {
 // emitted to the debug adaptor.
 void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg,
                                llvm::StringRef comm_file, char *argv[]) {
-#if defined(WIN_32)
-  llvm::errs() << "runInTerminal is not supported on Windows\n";
+#if !LLVM_ON_UNIX
+  llvm::errs() << "runInTerminal is only supported on POSIX systems\n";
   exit(EXIT_FAILURE);
 #else
   RunInTerminalLauncherCommChannel comm_channel(comm_file);

From 27aff2aa2ade9d78d0081445eadacd5b5006143e Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <a20012251@gmail.com>
Date: Thu, 28 Jan 2021 09:24:30 -0800
Subject: [PATCH 073/244] Fix lldb-vscode builds on Windows targeting POSIX

@stella.stamenova found out that lldb-vscode's Win32 macros were failing
when building on windows targetings POSIX platforms.

I'm changing these macros for LLVM_ON_UNIX, which should be more
accurate.

(cherry picked from commit 0bca9a7ce2eeaa9f1d732ffbc17769560a2b236e)
---
 lldb/tools/lldb-vscode/IOStream.cpp      |  6 +++---
 lldb/tools/lldb-vscode/IOStream.h        |  4 +++-
 lldb/tools/lldb-vscode/RunInTerminal.cpp |  6 +++---
 lldb/tools/lldb-vscode/VSCode.cpp        |  4 ++--
 lldb/tools/lldb-vscode/VSCode.h          |  2 ++
 lldb/tools/lldb-vscode/lldb-vscode.cpp   | 11 ++++++-----
 6 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp
index 4b11b90b4c2e..fdbfb554aedb 100644
--- a/lldb/tools/lldb-vscode/IOStream.cpp
+++ b/lldb/tools/lldb-vscode/IOStream.cpp
@@ -8,7 +8,7 @@
 
 #include "IOStream.h"
 
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
 #include <io.h>
 #else
 #include <netinet/in.h>
@@ -33,7 +33,7 @@ StreamDescriptor::~StreamDescriptor() {
     return;
 
   if (m_is_socket)
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
     ::closesocket(m_socket);
 #else
     ::close(m_socket);
@@ -108,7 +108,7 @@ bool InputStream::read_full(std::ofstream *log, size_t length,
     }
     if (bytes_read < 0) {
       int reason = 0;
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
       if (descriptor.m_is_socket)
         reason = WSAGetLastError();
       else
diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-vscode/IOStream.h
index 603ae9adcc2a..1ec7ac3ed0f9 100644
--- a/lldb/tools/lldb-vscode/IOStream.h
+++ b/lldb/tools/lldb-vscode/IOStream.h
@@ -9,7 +9,9 @@
 #ifndef LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H
 #define LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H
 
-#if defined(_WIN32)
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
+
+#if !LLVM_ON_UNIX
 // We need to #define NOMINMAX in order to skip `min()` and `max()` macro
 // definitions that conflict with other system headers.
 // We also need to #undef GetObject (which is defined to GetObjectW) because
diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-vscode/RunInTerminal.cpp
index 4db2806924ca..29edf5ca381d 100644
--- a/lldb/tools/lldb-vscode/RunInTerminal.cpp
+++ b/lldb/tools/lldb-vscode/RunInTerminal.cpp
@@ -6,7 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if !defined(WIN32)
+#include "RunInTerminal.h"
+
+#if LLVM_ON_UNIX
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -21,8 +23,6 @@
 
 #include "lldb/lldb-defines.h"
 
-#include "RunInTerminal.h"
-
 using namespace llvm;
 
 namespace lldb_vscode {
diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp
index e9fdc17f4147..4d0e281c1b8d 100644
--- a/lldb/tools/lldb-vscode/VSCode.cpp
+++ b/lldb/tools/lldb-vscode/VSCode.cpp
@@ -14,7 +14,7 @@
 #include "VSCode.h"
 #include "llvm/Support/FormatVariadic.h"
 
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
 #define NOMINMAX
 #include <fcntl.h>
 #include <io.h>
@@ -41,7 +41,7 @@ VSCode::VSCode()
       stop_at_entry(false), is_attach(false),
       reverse_request_seq(0), waiting_for_run_in_terminal(false) {
   const char *log_file_path = getenv("LLDBVSCODE_LOG");
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
   // Windows opens stdout and stdin in text mode which converts \n to 13,10
   // while the value is just 10 on Darwin/Linux. Setting the file mode to binary
   // fixes this.
diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h
index 8e7dfc078934..a2e1cac8ecf9 100644
--- a/lldb/tools/lldb-vscode/VSCode.h
+++ b/lldb/tools/lldb-vscode/VSCode.h
@@ -9,6 +9,8 @@
 #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H
 #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H
 
+#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
+
 #include <condition_variable>
 #include <iosfwd>
 #include <map>
diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp
index 69eb2e70aa6d..b7f39cbb1cb5 100644
--- a/lldb/tools/lldb-vscode/lldb-vscode.cpp
+++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "VSCode.h"
+
 #include <assert.h>
 #include <limits.h>
 #include <stdarg.h>
@@ -14,7 +16,7 @@
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
 // We need to #define NOMINMAX in order to skip `min()` and `max()` macro
 // definitions that conflict with other system headers.
 // We also need to #undef GetObject (which is defined to GetObjectW) because
@@ -52,9 +54,8 @@
 
 #include "JSONUtils.h"
 #include "LLDBUtils.h"
-#include "VSCode.h"
 
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
 #ifndef PATH_MAX
 #define PATH_MAX MAX_PATH
 #endif
@@ -131,7 +132,7 @@ SOCKET AcceptConnection(int portno) {
           *g_vsc.log << "error: accept (" << strerror(errno) << ")"
                      << std::endl;
     }
-#if defined(_WIN32)
+#if !LLVM_ON_UNIX
     closesocket(sockfd);
 #else
     close(sockfd);
@@ -3084,7 +3085,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-#if !defined(_WIN32)
+#if LLVM_ON_UNIX
   if (input_args.hasArg(OPT_wait_for_debugger)) {
     printf("Paused waiting for debugger to attach (pid = %i)...\n", getpid());
     pause();

From 1cb6551edb94eea1fc087b346b1e8d13775dc692 Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <a20012251@gmail.com>
Date: Thu, 4 Feb 2021 10:07:07 -0800
Subject: [PATCH 074/244] [lldb-vscode] correctly use Windows macros

@mstorsjo found a mistake that I made when trying to fix some Windows
compilation errors encountered by @stella.stamenova.

I was incorrectly using the LLVM_ON_UNIX macro. In any case, proper use
of

  #if defined(_WIN32)

should be the actual fix.

Differential Revision: https://reviews.llvm.org/D96060

(cherry picked from commit 36496cc2992d6fa26e6024971efcfc7d15f69888)
---
 lldb/tools/lldb-vscode/FifoFiles.cpp     |  6 +++---
 lldb/tools/lldb-vscode/IOStream.cpp      |  6 +++---
 lldb/tools/lldb-vscode/IOStream.h        |  2 +-
 lldb/tools/lldb-vscode/RunInTerminal.cpp |  2 +-
 lldb/tools/lldb-vscode/VSCode.cpp        |  4 ++--
 lldb/tools/lldb-vscode/lldb-vscode.cpp   | 10 +++++-----
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-vscode/FifoFiles.cpp
index 0a36c87d4a94..4b14fb16f96c 100644
--- a/lldb/tools/lldb-vscode/FifoFiles.cpp
+++ b/lldb/tools/lldb-vscode/FifoFiles.cpp
@@ -8,7 +8,7 @@
 
 #include "FifoFiles.h"
 
-#if LLVM_ON_UNIX
+#if !defined(_WIN32)
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -30,13 +30,13 @@ namespace lldb_vscode {
 FifoFile::FifoFile(StringRef path) : m_path(path) {}
 
 FifoFile::~FifoFile() {
-#if LLVM_ON_UNIX
+#if !defined(_WIN32)
   unlink(m_path.c_str());
 #endif
 };
 
 Expected<std::shared_ptr<FifoFile>> CreateFifoFile(StringRef path) {
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
   return createStringError(inconvertibleErrorCode(), "Unimplemented");
 #else
   if (int err = mkfifo(path.data(), 0600))
diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp
index fdbfb554aedb..cd22d906c14c 100644
--- a/lldb/tools/lldb-vscode/IOStream.cpp
+++ b/lldb/tools/lldb-vscode/IOStream.cpp
@@ -8,7 +8,7 @@
 
 #include "IOStream.h"
 
-#if !LLVM_ON_UNIX
+#if defined(_WIN32) 
 #include <io.h>
 #else
 #include <netinet/in.h>
@@ -33,7 +33,7 @@ StreamDescriptor::~StreamDescriptor() {
     return;
 
   if (m_is_socket)
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
     ::closesocket(m_socket);
 #else
     ::close(m_socket);
@@ -108,7 +108,7 @@ bool InputStream::read_full(std::ofstream *log, size_t length,
     }
     if (bytes_read < 0) {
       int reason = 0;
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
       if (descriptor.m_is_socket)
         reason = WSAGetLastError();
       else
diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-vscode/IOStream.h
index 1ec7ac3ed0f9..0eb9b6fefb0d 100644
--- a/lldb/tools/lldb-vscode/IOStream.h
+++ b/lldb/tools/lldb-vscode/IOStream.h
@@ -11,7 +11,7 @@
 
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
 // We need to #define NOMINMAX in order to skip `min()` and `max()` macro
 // definitions that conflict with other system headers.
 // We also need to #undef GetObject (which is defined to GetObjectW) because
diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-vscode/RunInTerminal.cpp
index 29edf5ca381d..2126563d9e96 100644
--- a/lldb/tools/lldb-vscode/RunInTerminal.cpp
+++ b/lldb/tools/lldb-vscode/RunInTerminal.cpp
@@ -8,7 +8,7 @@
 
 #include "RunInTerminal.h"
 
-#if LLVM_ON_UNIX
+#if !defined(_WIN32)
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp
index 4d0e281c1b8d..e9fdc17f4147 100644
--- a/lldb/tools/lldb-vscode/VSCode.cpp
+++ b/lldb/tools/lldb-vscode/VSCode.cpp
@@ -14,7 +14,7 @@
 #include "VSCode.h"
 #include "llvm/Support/FormatVariadic.h"
 
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
 #define NOMINMAX
 #include <fcntl.h>
 #include <io.h>
@@ -41,7 +41,7 @@ VSCode::VSCode()
       stop_at_entry(false), is_attach(false),
       reverse_request_seq(0), waiting_for_run_in_terminal(false) {
   const char *log_file_path = getenv("LLDBVSCODE_LOG");
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
   // Windows opens stdout and stdin in text mode which converts \n to 13,10
   // while the value is just 10 on Darwin/Linux. Setting the file mode to binary
   // fixes this.
diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp
index b7f39cbb1cb5..9469690cd7db 100644
--- a/lldb/tools/lldb-vscode/lldb-vscode.cpp
+++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp
@@ -16,7 +16,7 @@
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
 // We need to #define NOMINMAX in order to skip `min()` and `max()` macro
 // definitions that conflict with other system headers.
 // We also need to #undef GetObject (which is defined to GetObjectW) because
@@ -55,7 +55,7 @@
 #include "JSONUtils.h"
 #include "LLDBUtils.h"
 
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
 #ifndef PATH_MAX
 #define PATH_MAX MAX_PATH
 #endif
@@ -132,7 +132,7 @@ SOCKET AcceptConnection(int portno) {
           *g_vsc.log << "error: accept (" << strerror(errno) << ")"
                      << std::endl;
     }
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
     closesocket(sockfd);
 #else
     close(sockfd);
@@ -3003,7 +3003,7 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) {
 // emitted to the debug adaptor.
 void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg,
                                llvm::StringRef comm_file, char *argv[]) {
-#if !LLVM_ON_UNIX
+#if defined(_WIN32)
   llvm::errs() << "runInTerminal is only supported on POSIX systems\n";
   exit(EXIT_FAILURE);
 #else
@@ -3085,7 +3085,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-#if LLVM_ON_UNIX
+#if !defined(_WIN32)
   if (input_args.hasArg(OPT_wait_for_debugger)) {
     printf("Paused waiting for debugger to attach (pid = %i)...\n", getpid());
     pause();

From c9fb4a947e32abfaa73b0b91a58ef71c73316322 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu@google.com>
Date: Thu, 4 Feb 2021 17:00:09 -0800
Subject: [PATCH 075/244] [AST] Update LVal before evaluating lambda decl
 fields.

Differential Revision: https://reviews.llvm.org/D96092

(cherry picked from commit 96fb49c3ff8e08680127ddd4ec45a0e6c199243b)
---
 clang/lib/AST/ExprConstant.cpp                   |  8 +++++++-
 clang/test/SemaCXX/constant-expression-cxx2a.cpp | 10 ++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 56181bbe1166..1c4caa2c1fc0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -10009,6 +10009,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
   auto *CaptureInitIt = E->capture_init_begin();
   const LambdaCapture *CaptureIt = ClosureClass->captures_begin();
   bool Success = true;
+  const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(ClosureClass);
   for (const auto *Field : ClosureClass->fields()) {
     assert(CaptureInitIt != E->capture_init_end());
     // Get the initializer for this field
@@ -10019,8 +10020,13 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
     if (!CurFieldInit)
       return Error(E);
 
+    LValue Subobject = This;
+
+    if (!HandleLValueMember(Info, E, Subobject, Field, &Layout))
+      return false;
+
     APValue &FieldVal = Result.getStructField(Field->getFieldIndex());
-    if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) {
+    if (!EvaluateInPlace(FieldVal, Info, Subobject, CurFieldInit)) {
       if (!Info.keepEvaluatingAfterFailure())
         return false;
       Success = false;
diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
index 4adadc9988ab..86020a09db44 100644
--- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
@@ -1437,3 +1437,13 @@ constexpr bool destroy_at_test() {
   return true;
 }
 static_assert(destroy_at_test());
+
+namespace PR48582 {
+  struct S {
+    void *p = this;
+    constexpr S() {}
+    constexpr S(const S&) {}
+  };
+  constexpr bool b = [a = S(), b = S()] { return a.p == b.p; }();
+  static_assert(!b);
+}

From 8153dee37272a73b1ed74ac1bc12422fac8ef033 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Mon, 8 Feb 2021 17:58:05 -0800
Subject: [PATCH 076/244] PR48606: The lifetime of a constexpr heap allocation
 always started during the same evaluation.

It looks like the only case for which this matters is determining
whether mutable subobjects of a heap allocation can be modified during
constant evaluation.

(cherry picked from commit 21e8bb83253e1a2f4b6fad9b53cafe8c530a38e2)
---
 clang/lib/AST/ExprConstant.cpp                |  4 +--
 .../test/SemaCXX/cxx2a-constexpr-dynalloc.cpp | 34 +++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1c4caa2c1fc0..cd2b5141ebe8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -3497,8 +3497,8 @@ static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK,
 static bool lifetimeStartedInEvaluation(EvalInfo &Info,
                                         APValue::LValueBase Base,
                                         bool MutableSubobject = false) {
-  // A temporary we created.
-  if (Base.getCallIndex())
+  // A temporary or transient heap allocation we created.
+  if (Base.getCallIndex() || Base.is<DynamicAllocLValue>())
     return true;
 
   switch (Info.IsEvaluatingDecl) {
diff --git a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp
index 3647526ff0af..097ca00640e9 100644
--- a/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp
+++ b/clang/test/SemaCXX/cxx2a-constexpr-dynalloc.cpp
@@ -176,3 +176,37 @@ constexpr bool construct_after_lifetime_2() {
   return true;
 }
 static_assert(construct_after_lifetime_2()); // expected-error {{}} expected-note {{in call}}
+
+namespace PR48606 {
+  struct A { mutable int n = 0; };
+
+  constexpr bool f() {
+    A a;
+    A *p = &a;
+    p->~A();
+    std::construct_at<A>(p);
+    return true;
+  }
+  static_assert(f());
+
+  constexpr bool g() {
+    A *p = new A;
+    p->~A();
+    std::construct_at<A>(p);
+    delete p;
+    return true;
+  }
+  static_assert(g());
+
+  constexpr bool h() {
+    std::allocator<A> alloc;
+    A *p = alloc.allocate(1);
+    std::construct_at<A>(p);
+    p->~A();
+    std::construct_at<A>(p);
+    p->~A();
+    alloc.deallocate(p);
+    return true;
+  }
+  static_assert(h());
+}

From b46924ee5afe234526220c29a497794bf65f8f7f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 27 Jan 2021 10:14:54 +0000
Subject: [PATCH 077/244] Fix "not all control paths return a value" warning.
 NFCI.

---
 clang/lib/Basic/ProfileList.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp
index 56bc37a79301..2cb05c1c3c07 100644
--- a/clang/lib/Basic/ProfileList.cpp
+++ b/clang/lib/Basic/ProfileList.cpp
@@ -82,6 +82,7 @@ static StringRef getSectionName(CodeGenOptions::ProfileInstrKind Kind) {
   case CodeGenOptions::ProfileCSIRInstr:
     return "csllvm";
   }
+  llvm_unreachable("Unhandled CodeGenOptions::ProfileInstrKind enum");
 }
 
 llvm::Optional<bool>

From 8d20c14a8a3dd0f83d4066f957ba4c006d29942b Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Fri, 12 Feb 2021 16:55:44 +0000
Subject: [PATCH 078/244] [clangd] Fix clang tidy provider when multiple config
 files exist in directory tree

Currently Clang tidy provider searches from the root directory up to the target directory, this is the opposite of how clang-tidy searches for config files.
The result of this is .clang-tidy files are ignored in any subdirectory of a directory containing a .clang-tidy file.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D96204

(cherry picked from commit ba3ea9c60f0f259f0ccc47e47daf8253a5885531)
---
 clang-tools-extra/clangd/TidyProvider.cpp     |  2 +-
 .../clangd/unittests/CMakeLists.txt           |  1 +
 .../clangd/unittests/TidyProviderTests.cpp    | 60 +++++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 clang-tools-extra/clangd/unittests/TidyProviderTests.cpp

diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp
index c26c59fd347d..bcf1cd5a6183 100644
--- a/clang-tools-extra/clangd/TidyProvider.cpp
+++ b/clang-tools-extra/clangd/TidyProvider.cpp
@@ -106,7 +106,7 @@ class DotClangTidyTree {
     llvm::SmallVector<DotClangTidyCache *> Caches;
     {
       std::lock_guard<std::mutex> Lock(Mu);
-      for (auto I = path::begin(Parent), E = path::end(Parent); I != E; ++I) {
+      for (auto I = path::rbegin(Parent), E = path::rend(Parent); I != E; ++I) {
         assert(I->end() >= Parent.begin() && I->end() <= Parent.end() &&
                "Canonical path components should be substrings");
         llvm::StringRef Ancestor(Parent.begin(), I->end() - Parent.begin());
diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index adf4ac827cce..f4d364720eaf 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -93,6 +93,7 @@ add_unittest(ClangdUnitTests ClangdTests
   TestIndex.cpp
   TestTU.cpp
   TestWorkspace.cpp
+  TidyProviderTests.cpp
   TypeHierarchyTests.cpp
   URITests.cpp
   XRefsTests.cpp
diff --git a/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp b/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp
new file mode 100644
index 000000000000..a16c87456a1a
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/TidyProviderTests.cpp
@@ -0,0 +1,60 @@
+//===-- TidyProviderTests.cpp - Clang tidy configuration provider tests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestFS.h"
+#include "TidyProvider.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+TEST(TidyProvider, NestedDirectories) {
+  MockFS FS;
+  FS.Files[testPath(".clang-tidy")] = R"yaml(
+  Checks: 'llvm-*'
+  CheckOptions:
+    - key: TestKey
+      value: 1
+)yaml";
+  FS.Files[testPath("sub1/.clang-tidy")] = R"yaml(
+  Checks: 'misc-*'
+  CheckOptions:
+    - key: TestKey
+      value: 2
+)yaml";
+  FS.Files[testPath("sub1/sub2/.clang-tidy")] = R"yaml(
+  Checks: 'bugprone-*'
+  CheckOptions:
+    - key: TestKey
+      value: 3
+  InheritParentConfig: true
+)yaml";
+
+  TidyProvider Provider = provideClangTidyFiles(FS);
+
+  auto BaseOptions = getTidyOptionsForFile(Provider, testPath("File.cpp"));
+  ASSERT_TRUE(BaseOptions.Checks.hasValue());
+  EXPECT_EQ(*BaseOptions.Checks, "llvm-*");
+  EXPECT_EQ(BaseOptions.CheckOptions.lookup("TestKey").Value, "1");
+
+  auto Sub1Options = getTidyOptionsForFile(Provider, testPath("sub1/File.cpp"));
+  ASSERT_TRUE(Sub1Options.Checks.hasValue());
+  EXPECT_EQ(*Sub1Options.Checks, "misc-*");
+  EXPECT_EQ(Sub1Options.CheckOptions.lookup("TestKey").Value, "2");
+
+  auto Sub2Options =
+      getTidyOptionsForFile(Provider, testPath("sub1/sub2/File.cpp"));
+  ASSERT_TRUE(Sub2Options.Checks.hasValue());
+  EXPECT_EQ(*Sub2Options.Checks, "misc-*,bugprone-*");
+  EXPECT_EQ(Sub2Options.CheckOptions.lookup("TestKey").Value, "3");
+}
+} // namespace
+} // namespace clangd
+} // namespace clang

From 6604c3050948d602ef24b3d3efbf9f4410494833 Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Tue, 2 Feb 2021 14:21:33 -0800
Subject: [PATCH 079/244] [GlobalISel] Check if branches use the same MBB in
 matchOptBrCondByInvertingCond

If the G_BR + G_BRCOND in this combine use the same MBB, then it will infinite
loop. Don't allow that to happen.

Differential Revision: https://reviews.llvm.org/D95895

(cherry picked from commit 02d4b365bf4f8c2cb56e5612902f6c3bb4316493)
---
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  9 +++----
 .../GlobalISel/prelegalizercombiner-br.mir    | 24 +++++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index df0219fcfa64..a9353bdfb780 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -968,10 +968,11 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
     return false;
 
-  // Check that the next block is the conditional branch target.
-  if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
-    return false;
-  return true;
+  // Check that the next block is the conditional branch target. Also make sure
+  // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+  MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+  return BrCondTarget != MI.getOperand(0).getMBB() &&
+         MBB->isLayoutSuccessor(BrCondTarget);
 }
 
 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
index 0631ff89ade0..0647de44c4b8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-br.mir
@@ -29,6 +29,7 @@
     ret i32 %retval.0
   }
 
+  define void @dont_combine_same_block() { ret void }
 
 ...
 ---
@@ -87,3 +88,26 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+---
+name:            dont_combine_same_block
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: dont_combine_same_block
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $w0, $w1
+  ; CHECK:   %cond:_(s1) = G_IMPLICIT_DEF
+  ; CHECK:   G_BRCOND %cond(s1), %bb.1
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   RET_ReallyLR
+  bb.0:
+    liveins: $w0, $w1
+    %cond:_(s1) = G_IMPLICIT_DEF
+
+    ; The G_BRCOND and G_BR have the same target here. Don't change anything.
+    G_BRCOND %cond(s1), %bb.1
+    G_BR %bb.1
+  bb.1:
+    RET_ReallyLR
+...

From 04cb6b5ea8bd2b52e3d11f4cb970fd2d144eee6a Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Mon, 8 Feb 2021 17:32:52 -0800
Subject: [PATCH 080/244] PR48587: is_constant_evaluated() should not evaluate
 to true during a variable's destruction if it didn't do so during
 construction.

The standard doesn't give any guidance as to what to do here, but this
approach seems reasonable and conservative, and has been proposed to the
standard committee.

(cherry picked from commit c945dc4a5023d6a17d11fcda76509b94b36e34fc)
---
 clang/lib/AST/ExprConstant.cpp                | 19 +++-
 .../builtin-is-constant-evaluated.cpp         | 92 +++++++++++++++++++
 2 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index cd2b5141ebe8..1bdad771a923 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14792,11 +14792,14 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx,
 
 static bool EvaluateDestruction(const ASTContext &Ctx, APValue::LValueBase Base,
                                 APValue DestroyedValue, QualType Type,
-                                SourceLocation Loc, Expr::EvalStatus &EStatus) {
-  EvalInfo Info(Ctx, EStatus, EvalInfo::EM_ConstantExpression);
+                                SourceLocation Loc, Expr::EvalStatus &EStatus,
+                                bool IsConstantDestruction) {
+  EvalInfo Info(Ctx, EStatus,
+                IsConstantDestruction ? EvalInfo::EM_ConstantExpression
+                                      : EvalInfo::EM_ConstantFold);
   Info.setEvaluatingDecl(Base, DestroyedValue,
                          EvalInfo::EvaluatingDeclKind::Dtor);
-  Info.InConstantContext = true;
+  Info.InConstantContext = IsConstantDestruction;
 
   LValue LVal;
   LVal.set(Base);
@@ -14850,7 +14853,8 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx,
   // If this is a class template argument, it's required to have constant
   // destruction too.
   if (Kind == ConstantExprKind::ClassTemplateArgument &&
-      (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result) ||
+      (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result,
+                            true) ||
        Result.HasSideEffects)) {
     // FIXME: Prefix a note to indicate that the problem is lack of constant
     // destruction.
@@ -14916,6 +14920,10 @@ bool VarDecl::evaluateDestruction(
   Expr::EvalStatus EStatus;
   EStatus.Diag = &Notes;
 
+  // Only treat the destruction as constant destruction if we formally have
+  // constant initialization (or are usable in a constant expression).
+  bool IsConstantDestruction = hasConstantInitialization();
+
   // Make a copy of the value for the destructor to mutate, if we know it.
   // Otherwise, treat the value as default-initialized; if the destructor works
   // anyway, then the destruction is constant (and must be essentially empty).
@@ -14926,7 +14934,8 @@ bool VarDecl::evaluateDestruction(
     return false;
 
   if (!EvaluateDestruction(getASTContext(), this, std::move(DestroyedValue),
-                           getType(), getLocation(), EStatus) ||
+                           getType(), getLocation(), EStatus,
+                           IsConstantDestruction) ||
       EStatus.HasSideEffects)
     return false;
 
diff --git a/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp b/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp
index 967c83496ab9..d30fefe55b4f 100644
--- a/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp
+++ b/clang/test/CodeGenCXX/builtin-is-constant-evaluated.cpp
@@ -4,6 +4,7 @@
 // RUN: FileCheck -check-prefix=CHECK-DYN -input-file=%t.ll %s
 // RUN: FileCheck -check-prefix=CHECK-ARR -input-file=%t.ll %s
 // RUN: FileCheck -check-prefix=CHECK-FOLD -input-file=%t.ll %s
+// RUN: FileCheck -check-prefix=CHECK-DTOR -input-file=%t.ll %s
 
 using size_t = decltype(sizeof(int));
 
@@ -131,3 +132,94 @@ void test_ref_to_static_var() {
   // CHECK-FOLD: store i32* @_ZZ22test_ref_to_static_varvE10i_constant, i32** %r,
   int &r = __builtin_is_constant_evaluated() ? i_constant : i_non_constant;
 }
+
+int not_constexpr;
+
+// __builtin_is_constant_evaluated() should never evaluate to true during
+// destruction if it would not have done so during construction.
+//
+// FIXME: The standard doesn't say that it should ever return true when
+// evaluating a destructor call, even for a constexpr variable. That seems
+// obviously wrong.
+struct DestructorBCE {
+  int n;
+  constexpr DestructorBCE(int n) : n(n) {}
+  constexpr ~DestructorBCE() {
+    if (!__builtin_is_constant_evaluated())
+      not_constexpr = 1;
+  }
+};
+
+// CHECK-DTOR-NOT: @_ZN13DestructorBCED{{.*}}@global_dtor_bce_1
+DestructorBCE global_dtor_bce_1(101);
+
+// CHECK-DTOR: load i32, i32* @not_constexpr
+// CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}} @global_dtor_bce_2, i32
+// CHECK-DTOR: atexit{{.*}} @_ZN13DestructorBCED{{.*}} @global_dtor_bce_2
+// CHECK-DTOR: }
+DestructorBCE global_dtor_bce_2(not_constexpr);
+
+// CHECK-DTOR-NOT: @_ZN13DestructorBCED{{.*}}@global_dtor_bce_3
+constexpr DestructorBCE global_dtor_bce_3(103);
+
+// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_1v(
+void test_dtor_bce_1() {
+  // Variable is neither constant initialized (because it has automatic storage
+  // duration) nor usable in constant expressions, so BCE should not return
+  // true during destruction. It would be OK if we replaced the constructor
+  // call with a direct store, but we should emit the destructor call.
+
+  // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}}, i32 201)
+  DestructorBCE local(201);
+  // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCED
+  // CHECK-DTOR: }
+}
+
+// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_2v(
+void test_dtor_bce_2() {
+  // Non-constant init => BCE is false in destructor.
+
+  // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}}
+  DestructorBCE local(not_constexpr);
+  // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCED
+  // CHECK-DTOR: }
+}
+
+// CHECK-DTOR-LABEL: define {{.*}} @_Z15test_dtor_bce_3v(
+void test_dtor_bce_3() {
+  // Should never call dtor for a constexpr variable.
+
+  // CHECK-DTOR-NOT: call {{.*}} @_ZN13DestructorBCEC1Ei(
+  constexpr DestructorBCE local(203);
+  // CHECK-DTOR-NOT: @_ZN13DestructorBCED
+  // CHECK-DTOR: }
+}
+
+// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_1v(
+void test_dtor_bce_static_1() {
+  // Variable is constant initialized, so BCE returns true during constant
+  // destruction.
+
+  // CHECK: store i32 301
+  // CHECK-DTOR-NOT: @_ZN13DestructorBCEC1Ei({{.*}}
+  static DestructorBCE local(301);
+  // CHECK-DTOR-NOT: @_ZN13DestructorBCED
+  // CHECK-DTOR: }
+}
+
+// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_2v(
+void test_dtor_bce_static_2() {
+  // CHECK-DTOR: call {{.*}} @_ZN13DestructorBCEC1Ei({{.*}}
+  static DestructorBCE local(not_constexpr);
+  // CHECK-DTOR: call {{.*}}atexit{{.*}} @_ZN13DestructorBCED
+  // CHECK-DTOR: }
+}
+
+// CHECK-DTOR-LABEL: define {{.*}} @_Z22test_dtor_bce_static_3v(
+void test_dtor_bce_static_3() {
+  // CHECK: store i32 303
+  // CHECK-DTOR-NOT: @_ZN13DestructorBCEC1Ei({{.*}}
+  static constexpr DestructorBCE local(303);
+  // CHECK-DTOR-NOT: @_ZN13DestructorBCED
+  // CHECK-DTOR: }
+}

From 205ecd9b79c6915a85050246c961f167b494df43 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Tue, 9 Feb 2021 06:33:48 -0600
Subject: [PATCH 081/244] [DAGCombine] Do not remove masking argument to
 FP16_TO_FP for some targets

As of commit 284f2bffc9bc5, the DAG Combiner gets rid of the masking of the
input to this node if the mask only keeps the bottom 16 bits. This is because
the underlying library function does not use the high order bits. However, on
PowerPC's ELFv2 ABI, it is the caller that is responsible for clearing the bits
from the register. Therefore, the library implementation of __gnu_h2f_ieee will
return an incorrect result if the bits aren't cleared.

This combine is desired for ARM (and possibly other targets) so this patch adds
a query to Target Lowering to check if this zeroing needs to be kept.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=49092

Differential revision: https://reviews.llvm.org/D96283

(cherry picked from commit a5222aa0858a42660629c410a5b669dee16a4359)
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  4 ++
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  3 ++
 .../PowerPC/handle-f16-storage-type.ll        |  4 ++
 llvm/test/CodeGen/PowerPC/pr48519.ll          |  2 +
 llvm/test/CodeGen/PowerPC/pr49092.ll          | 39 +++++++++++++++++++
 6 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr49092.ll

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c3221aac8eea..40115fbd2f15 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2785,6 +2785,10 @@ class TargetLoweringBase {
     return false;
   }
 
+  /// Does this target require the clearing of high-order bits in a register
+  /// passed to the fp16 to fp conversion library function.
+  virtual bool shouldKeepZExtForFP16Conv() const { return false; }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 615bea2a4905..89670d708264 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21174,7 +21174,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
 
   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
-  if (N0->getOpcode() == ISD::AND) {
+  if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 477105bd03ac..0dda2c181572 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -987,6 +987,9 @@ namespace llvm {
     shouldExpandBuildVectorWithShuffles(EVT VT,
                                         unsigned DefinedValues) const override;
 
+    // Keep the zero-extensions for arguments to libcalls.
+    bool shouldKeepZExtForFP16Conv() const override { return true; }
+
     /// createFastISel - This method returns a target-specific FastISel object,
     /// or null if the target does not support "fast" instruction selection.
     FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
index 9977b6b33560..ab19afa2beb5 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
@@ -1156,6 +1156,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
 ; P8-NEXT:    xscvsxdsp f1, f0
 ; P8-NEXT:    bl __gnu_f2h_ieee
 ; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
 ; P8-NEXT:    xsaddsp f1, f31, f1
@@ -1175,6 +1176,7 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
 ; CHECK-NEXT:    xscvhpdp f0, f0
 ; CHECK-NEXT:    xscvdphp f1, f1
 ; CHECK-NEXT:    mffprwz r3, f1
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f1, r3
 ; CHECK-NEXT:    xscvhpdp f1, f1
 ; CHECK-NEXT:    xsaddsp f1, f0, f1
@@ -1225,6 +1227,7 @@ define half @PR40273(half) #0 {
 ; P8-NEXT:    stdu r1, -32(r1)
 ; P8-NEXT:    bl __gnu_f2h_ieee
 ; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
 ; P8-NEXT:    bl __gnu_h2f_ieee
 ; P8-NEXT:    nop
 ; P8-NEXT:    xxlxor f0, f0, f0
@@ -1245,6 +1248,7 @@ define half @PR40273(half) #0 {
 ; CHECK-NEXT:    xscvdphp f0, f1
 ; CHECK-NEXT:    xxlxor f1, f1, f1
 ; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
 ; CHECK-NEXT:    mtfprwz f0, r3
 ; CHECK-NEXT:    xscvhpdp f0, f0
 ; CHECK-NEXT:    fcmpu cr0, f0, f1
diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index 50970cb185d8..035cc49b93e6 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -22,6 +22,7 @@ define void @julia__typed_vcat_20() #0 {
 ; CHECK-NEXT:    xscvsxdsp f1, f0
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    clrldi r3, r3, 48
 ; CHECK-NEXT:    bl __gnu_h2f_ieee
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r30, r30, -1
@@ -46,6 +47,7 @@ define void @julia__typed_vcat_20() #0 {
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P9-NEXT:    xscvdphp f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    clrlwi r3, r3, 16
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xscvhpdp f0, f0
diff --git a/llvm/test/CodeGen/PowerPC/pr49092.ll b/llvm/test/CodeGen/PowerPC/pr49092.ll
new file mode 100644
index 000000000000..2fce58418515
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr49092.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
+; RUN:   -check-prefix=CHECK-P9
+
+define dso_local half @test2(i64 %a, i64 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    addi r3, r3, 11
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; CHECK-P9-LABEL: test2:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    add r3, r4, r3
+; CHECK-P9-NEXT:    addi r3, r3, 11
+; CHECK-P9-NEXT:    clrlwi r3, r3, 16
+; CHECK-P9-NEXT:    mtfprwz f0, r3
+; CHECK-P9-NEXT:    xscvhpdp f1, f0
+; CHECK-P9-NEXT:    blr
+entry:
+  %add = add i64 %b, %a
+  %0 = trunc i64 %add to i16
+  %conv = add i16 %0, 11
+  %call = bitcast i16 %conv to half
+  ret half %call
+}
+attributes #0 = { nounwind }

From 34cda01e235c549b56ffe30a7b09df0414d56ea0 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Tue, 2 Feb 2021 14:40:52 +0000
Subject: [PATCH 082/244] [RISCV] Fix incorrect RVV sdiv/udiv lowering

Due to a clerical error, the sdiv operation was mapping to vdivu and
udiv to vdiv, when the opposite mapping is the correct one.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D95869

(cherry picked from commit b4106f9c7b8c498d109301ced7bf9aca32027168)
---
 .../Target/RISCV/RISCVInstrInfoVSDPatterns.td |  4 +-
 .../CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll     | 88 +++++++++----------
 .../CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll     | 88 +++++++++----------
 .../CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll    | 88 +++++++++----------
 .../CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll    | 88 +++++++++----------
 5 files changed, 178 insertions(+), 178 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 79a1e6ddc8a2..dee67708bed1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -384,8 +384,8 @@ defm "" : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">;
 defm "" : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">;
 
 // 12.11. Vector Integer Divide Instructions
-defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIVU">;
-defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIV">;
+defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">;
+defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">;
 defm "" : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">;
 defm "" : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll
index 239151274c4e..bbfc09d1c276 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv32.ll
@@ -5,7 +5,7 @@ define <vscale x 1 x i8> @vdiv_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8
 ; CHECK-LABEL: vdiv_vv_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i8> %va, %vb
   ret <vscale x 1 x i8> %vc
@@ -15,7 +15,7 @@ define <vscale x 1 x i8> @vdiv_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -70,7 +70,7 @@ define <vscale x 2 x i8> @vdiv_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8
 ; CHECK-LABEL: vdiv_vv_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i8> %va, %vb
   ret <vscale x 2 x i8> %vc
@@ -80,7 +80,7 @@ define <vscale x 2 x i8> @vdiv_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -111,7 +111,7 @@ define <vscale x 4 x i8> @vdiv_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8
 ; CHECK-LABEL: vdiv_vv_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i8> %va, %vb
   ret <vscale x 4 x i8> %vc
@@ -121,7 +121,7 @@ define <vscale x 4 x i8> @vdiv_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -152,7 +152,7 @@ define <vscale x 8 x i8> @vdiv_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8
 ; CHECK-LABEL: vdiv_vv_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i8> %va, %vb
   ret <vscale x 8 x i8> %vc
@@ -162,7 +162,7 @@ define <vscale x 8 x i8> @vdiv_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -193,7 +193,7 @@ define <vscale x 16 x i8> @vdiv_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-LABEL: vdiv_vv_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 16 x i8> %va, %vb
   ret <vscale x 16 x i8> %vc
@@ -203,7 +203,7 @@ define <vscale x 16 x i8> @vdiv_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %b
 ; CHECK-LABEL: vdiv_vx_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -234,7 +234,7 @@ define <vscale x 32 x i8> @vdiv_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-LABEL: vdiv_vv_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 32 x i8> %va, %vb
   ret <vscale x 32 x i8> %vc
@@ -244,7 +244,7 @@ define <vscale x 32 x i8> @vdiv_vx_nxv32i8(<vscale x 32 x i8> %va, i8 signext %b
 ; CHECK-LABEL: vdiv_vx_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
@@ -275,7 +275,7 @@ define <vscale x 64 x i8> @vdiv_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-LABEL: vdiv_vv_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 64 x i8> %va, %vb
   ret <vscale x 64 x i8> %vc
@@ -285,7 +285,7 @@ define <vscale x 64 x i8> @vdiv_vx_nxv64i8(<vscale x 64 x i8> %va, i8 signext %b
 ; CHECK-LABEL: vdiv_vx_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
@@ -316,7 +316,7 @@ define <vscale x 1 x i16> @vdiv_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
 ; CHECK-LABEL: vdiv_vv_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i16> %va, %vb
   ret <vscale x 1 x i16> %vc
@@ -326,7 +326,7 @@ define <vscale x 1 x i16> @vdiv_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -357,7 +357,7 @@ define <vscale x 2 x i16> @vdiv_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
 ; CHECK-LABEL: vdiv_vv_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i16> %va, %vb
   ret <vscale x 2 x i16> %vc
@@ -367,7 +367,7 @@ define <vscale x 2 x i16> @vdiv_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -398,7 +398,7 @@ define <vscale x 4 x i16> @vdiv_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
 ; CHECK-LABEL: vdiv_vv_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i16> %va, %vb
   ret <vscale x 4 x i16> %vc
@@ -408,7 +408,7 @@ define <vscale x 4 x i16> @vdiv_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -439,7 +439,7 @@ define <vscale x 8 x i16> @vdiv_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
 ; CHECK-LABEL: vdiv_vv_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i16> %va, %vb
   ret <vscale x 8 x i16> %vc
@@ -449,7 +449,7 @@ define <vscale x 8 x i16> @vdiv_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -480,7 +480,7 @@ define <vscale x 16 x i16> @vdiv_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; CHECK-LABEL: vdiv_vv_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 16 x i16> %va, %vb
   ret <vscale x 16 x i16> %vc
@@ -490,7 +490,7 @@ define <vscale x 16 x i16> @vdiv_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signex
 ; CHECK-LABEL: vdiv_vx_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
@@ -521,7 +521,7 @@ define <vscale x 32 x i16> @vdiv_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; CHECK-LABEL: vdiv_vv_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 32 x i16> %va, %vb
   ret <vscale x 32 x i16> %vc
@@ -531,7 +531,7 @@ define <vscale x 32 x i16> @vdiv_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signex
 ; CHECK-LABEL: vdiv_vx_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
@@ -562,7 +562,7 @@ define <vscale x 1 x i32> @vdiv_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
 ; CHECK-LABEL: vdiv_vv_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i32> %va, %vb
   ret <vscale x 1 x i32> %vc
@@ -572,7 +572,7 @@ define <vscale x 1 x i32> @vdiv_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@@ -603,7 +603,7 @@ define <vscale x 2 x i32> @vdiv_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
 ; CHECK-LABEL: vdiv_vv_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i32> %va, %vb
   ret <vscale x 2 x i32> %vc
@@ -613,7 +613,7 @@ define <vscale x 2 x i32> @vdiv_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -644,7 +644,7 @@ define <vscale x 4 x i32> @vdiv_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
 ; CHECK-LABEL: vdiv_vv_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i32> %va, %vb
   ret <vscale x 4 x i32> %vc
@@ -654,7 +654,7 @@ define <vscale x 4 x i32> @vdiv_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -685,7 +685,7 @@ define <vscale x 8 x i32> @vdiv_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
 ; CHECK-LABEL: vdiv_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i32> %va, %vb
   ret <vscale x 8 x i32> %vc
@@ -695,7 +695,7 @@ define <vscale x 8 x i32> @vdiv_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -726,7 +726,7 @@ define <vscale x 16 x i32> @vdiv_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; CHECK-LABEL: vdiv_vv_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 16 x i32> %va, %vb
   ret <vscale x 16 x i32> %vc
@@ -736,7 +736,7 @@ define <vscale x 16 x i32> @vdiv_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
@@ -767,7 +767,7 @@ define <vscale x 1 x i64> @vdiv_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
 ; CHECK-LABEL: vdiv_vv_nxv1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i64> %va, %vb
   ret <vscale x 1 x i64> %vc
@@ -784,7 +784,7 @@ define <vscale x 1 x i64> @vdiv_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v26, v26, a1
 ; CHECK-NEXT:    vsrl.vx v26, v26, a1
 ; CHECK-NEXT:    vor.vv v25, v26, v25
-; CHECK-NEXT:    vdivu.vv v8, v8, v25
+; CHECK-NEXT:    vdiv.vv v8, v8, v25
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
@@ -825,7 +825,7 @@ define <vscale x 2 x i64> @vdiv_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
 ; CHECK-LABEL: vdiv_vv_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i64> %va, %vb
   ret <vscale x 2 x i64> %vc
@@ -842,7 +842,7 @@ define <vscale x 2 x i64> @vdiv_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v28, v28, a1
 ; CHECK-NEXT:    vsrl.vx v28, v28, a1
 ; CHECK-NEXT:    vor.vv v26, v28, v26
-; CHECK-NEXT:    vdivu.vv v8, v8, v26
+; CHECK-NEXT:    vdiv.vv v8, v8, v26
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
@@ -883,7 +883,7 @@ define <vscale x 4 x i64> @vdiv_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
 ; CHECK-LABEL: vdiv_vv_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i64> %va, %vb
   ret <vscale x 4 x i64> %vc
@@ -900,7 +900,7 @@ define <vscale x 4 x i64> @vdiv_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v12, v12, a1
 ; CHECK-NEXT:    vsrl.vx v12, v12, a1
 ; CHECK-NEXT:    vor.vv v28, v12, v28
-; CHECK-NEXT:    vdivu.vv v8, v8, v28
+; CHECK-NEXT:    vdiv.vv v8, v8, v28
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -941,7 +941,7 @@ define <vscale x 8 x i64> @vdiv_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x
 ; CHECK-LABEL: vdiv_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
@@ -958,7 +958,7 @@ define <vscale x 8 x i64> @vdiv_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v24, v24, a1
 ; CHECK-NEXT:    vsrl.vx v24, v24, a1
 ; CHECK-NEXT:    vor.vv v16, v24, v16
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll
index 991cccf72cdd..b8f331e78b5b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode-rv64.ll
@@ -5,7 +5,7 @@ define <vscale x 1 x i8> @vdiv_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8
 ; CHECK-LABEL: vdiv_vv_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i8> %va, %vb
   ret <vscale x 1 x i8> %vc
@@ -15,7 +15,7 @@ define <vscale x 1 x i8> @vdiv_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -46,7 +46,7 @@ define <vscale x 2 x i8> @vdiv_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8
 ; CHECK-LABEL: vdiv_vv_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i8> %va, %vb
   ret <vscale x 2 x i8> %vc
@@ -56,7 +56,7 @@ define <vscale x 2 x i8> @vdiv_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -87,7 +87,7 @@ define <vscale x 4 x i8> @vdiv_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8
 ; CHECK-LABEL: vdiv_vv_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i8> %va, %vb
   ret <vscale x 4 x i8> %vc
@@ -97,7 +97,7 @@ define <vscale x 4 x i8> @vdiv_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -128,7 +128,7 @@ define <vscale x 8 x i8> @vdiv_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8
 ; CHECK-LABEL: vdiv_vv_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i8> %va, %vb
   ret <vscale x 8 x i8> %vc
@@ -138,7 +138,7 @@ define <vscale x 8 x i8> @vdiv_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vdiv_vx_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -169,7 +169,7 @@ define <vscale x 16 x i8> @vdiv_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-LABEL: vdiv_vv_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 16 x i8> %va, %vb
   ret <vscale x 16 x i8> %vc
@@ -179,7 +179,7 @@ define <vscale x 16 x i8> @vdiv_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %b
 ; CHECK-LABEL: vdiv_vx_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -210,7 +210,7 @@ define <vscale x 32 x i8> @vdiv_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-LABEL: vdiv_vv_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 32 x i8> %va, %vb
   ret <vscale x 32 x i8> %vc
@@ -220,7 +220,7 @@ define <vscale x 32 x i8> @vdiv_vx_nxv32i8(<vscale x 32 x i8> %va, i8 signext %b
 ; CHECK-LABEL: vdiv_vx_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
@@ -251,7 +251,7 @@ define <vscale x 64 x i8> @vdiv_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-LABEL: vdiv_vv_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 64 x i8> %va, %vb
   ret <vscale x 64 x i8> %vc
@@ -261,7 +261,7 @@ define <vscale x 64 x i8> @vdiv_vx_nxv64i8(<vscale x 64 x i8> %va, i8 signext %b
 ; CHECK-LABEL: vdiv_vx_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
@@ -292,7 +292,7 @@ define <vscale x 1 x i16> @vdiv_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x
 ; CHECK-LABEL: vdiv_vv_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i16> %va, %vb
   ret <vscale x 1 x i16> %vc
@@ -302,7 +302,7 @@ define <vscale x 1 x i16> @vdiv_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -333,7 +333,7 @@ define <vscale x 2 x i16> @vdiv_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x
 ; CHECK-LABEL: vdiv_vv_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i16> %va, %vb
   ret <vscale x 2 x i16> %vc
@@ -343,7 +343,7 @@ define <vscale x 2 x i16> @vdiv_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -374,7 +374,7 @@ define <vscale x 4 x i16> @vdiv_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x
 ; CHECK-LABEL: vdiv_vv_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i16> %va, %vb
   ret <vscale x 4 x i16> %vc
@@ -384,7 +384,7 @@ define <vscale x 4 x i16> @vdiv_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -415,7 +415,7 @@ define <vscale x 8 x i16> @vdiv_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x
 ; CHECK-LABEL: vdiv_vv_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i16> %va, %vb
   ret <vscale x 8 x i16> %vc
@@ -425,7 +425,7 @@ define <vscale x 8 x i16> @vdiv_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %
 ; CHECK-LABEL: vdiv_vx_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -456,7 +456,7 @@ define <vscale x 16 x i16> @vdiv_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; CHECK-LABEL: vdiv_vv_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 16 x i16> %va, %vb
   ret <vscale x 16 x i16> %vc
@@ -466,7 +466,7 @@ define <vscale x 16 x i16> @vdiv_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signex
 ; CHECK-LABEL: vdiv_vx_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
@@ -497,7 +497,7 @@ define <vscale x 32 x i16> @vdiv_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; CHECK-LABEL: vdiv_vv_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 32 x i16> %va, %vb
   ret <vscale x 32 x i16> %vc
@@ -507,7 +507,7 @@ define <vscale x 32 x i16> @vdiv_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signex
 ; CHECK-LABEL: vdiv_vx_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
@@ -538,7 +538,7 @@ define <vscale x 1 x i32> @vdiv_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x
 ; CHECK-LABEL: vdiv_vv_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i32> %va, %vb
   ret <vscale x 1 x i32> %vc
@@ -548,7 +548,7 @@ define <vscale x 1 x i32> @vdiv_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext %
 ; CHECK-LABEL: vdiv_vx_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@@ -580,7 +580,7 @@ define <vscale x 2 x i32> @vdiv_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x
 ; CHECK-LABEL: vdiv_vv_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i32> %va, %vb
   ret <vscale x 2 x i32> %vc
@@ -590,7 +590,7 @@ define <vscale x 2 x i32> @vdiv_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext %
 ; CHECK-LABEL: vdiv_vx_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -622,7 +622,7 @@ define <vscale x 4 x i32> @vdiv_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x
 ; CHECK-LABEL: vdiv_vv_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i32> %va, %vb
   ret <vscale x 4 x i32> %vc
@@ -632,7 +632,7 @@ define <vscale x 4 x i32> @vdiv_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext %
 ; CHECK-LABEL: vdiv_vx_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -664,7 +664,7 @@ define <vscale x 8 x i32> @vdiv_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x
 ; CHECK-LABEL: vdiv_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i32> %va, %vb
   ret <vscale x 8 x i32> %vc
@@ -674,7 +674,7 @@ define <vscale x 8 x i32> @vdiv_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext %
 ; CHECK-LABEL: vdiv_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -706,7 +706,7 @@ define <vscale x 16 x i32> @vdiv_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; CHECK-LABEL: vdiv_vv_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 16 x i32> %va, %vb
   ret <vscale x 16 x i32> %vc
@@ -716,7 +716,7 @@ define <vscale x 16 x i32> @vdiv_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signex
 ; CHECK-LABEL: vdiv_vx_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
@@ -748,7 +748,7 @@ define <vscale x 1 x i64> @vdiv_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
 ; CHECK-LABEL: vdiv_vv_nxv1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v9
+; CHECK-NEXT:    vdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 1 x i64> %va, %vb
   ret <vscale x 1 x i64> %vc
@@ -758,7 +758,7 @@ define <vscale x 1 x i64> @vdiv_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
@@ -796,7 +796,7 @@ define <vscale x 2 x i64> @vdiv_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
 ; CHECK-LABEL: vdiv_vv_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
+; CHECK-NEXT:    vdiv.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 2 x i64> %va, %vb
   ret <vscale x 2 x i64> %vc
@@ -806,7 +806,7 @@ define <vscale x 2 x i64> @vdiv_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m2,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
@@ -844,7 +844,7 @@ define <vscale x 4 x i64> @vdiv_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
 ; CHECK-LABEL: vdiv_vv_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v12
+; CHECK-NEXT:    vdiv.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 4 x i64> %va, %vb
   ret <vscale x 4 x i64> %vc
@@ -854,7 +854,7 @@ define <vscale x 4 x i64> @vdiv_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m4,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -892,7 +892,7 @@ define <vscale x 8 x i64> @vdiv_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x
 ; CHECK-LABEL: vdiv_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vdivu.vv v8, v8, v16
+; CHECK-NEXT:    vdiv.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = sdiv <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
@@ -902,7 +902,7 @@ define <vscale x 8 x i64> @vdiv_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdiv_vx_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vdivu.vx v8, v8, a0
+; CHECK-NEXT:    vdiv.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll
index 27b27cd64bae..383d3f380fe8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll
@@ -5,7 +5,7 @@ define <vscale x 1 x i8> @vdivu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i
 ; CHECK-LABEL: vdivu_vv_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i8> %va, %vb
   ret <vscale x 1 x i8> %vc
@@ -15,7 +15,7 @@ define <vscale x 1 x i8> @vdivu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -68,7 +68,7 @@ define <vscale x 2 x i8> @vdivu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i
 ; CHECK-LABEL: vdivu_vv_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i8> %va, %vb
   ret <vscale x 2 x i8> %vc
@@ -78,7 +78,7 @@ define <vscale x 2 x i8> @vdivu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -107,7 +107,7 @@ define <vscale x 4 x i8> @vdivu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i
 ; CHECK-LABEL: vdivu_vv_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i8> %va, %vb
   ret <vscale x 4 x i8> %vc
@@ -117,7 +117,7 @@ define <vscale x 4 x i8> @vdivu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -146,7 +146,7 @@ define <vscale x 8 x i8> @vdivu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i
 ; CHECK-LABEL: vdivu_vv_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i8> %va, %vb
   ret <vscale x 8 x i8> %vc
@@ -156,7 +156,7 @@ define <vscale x 8 x i8> @vdivu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -185,7 +185,7 @@ define <vscale x 16 x i8> @vdivu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-LABEL: vdivu_vv_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 16 x i8> %va, %vb
   ret <vscale x 16 x i8> %vc
@@ -195,7 +195,7 @@ define <vscale x 16 x i8> @vdivu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %
 ; CHECK-LABEL: vdivu_vx_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -224,7 +224,7 @@ define <vscale x 32 x i8> @vdivu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-LABEL: vdivu_vv_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 32 x i8> %va, %vb
   ret <vscale x 32 x i8> %vc
@@ -234,7 +234,7 @@ define <vscale x 32 x i8> @vdivu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 signext %
 ; CHECK-LABEL: vdivu_vx_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
@@ -263,7 +263,7 @@ define <vscale x 64 x i8> @vdivu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-LABEL: vdivu_vv_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 64 x i8> %va, %vb
   ret <vscale x 64 x i8> %vc
@@ -273,7 +273,7 @@ define <vscale x 64 x i8> @vdivu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 signext %
 ; CHECK-LABEL: vdivu_vx_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
@@ -302,7 +302,7 @@ define <vscale x 1 x i16> @vdivu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1
 ; CHECK-LABEL: vdivu_vv_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i16> %va, %vb
   ret <vscale x 1 x i16> %vc
@@ -312,7 +312,7 @@ define <vscale x 1 x i16> @vdivu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -342,7 +342,7 @@ define <vscale x 2 x i16> @vdivu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2
 ; CHECK-LABEL: vdivu_vv_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i16> %va, %vb
   ret <vscale x 2 x i16> %vc
@@ -352,7 +352,7 @@ define <vscale x 2 x i16> @vdivu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -382,7 +382,7 @@ define <vscale x 4 x i16> @vdivu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4
 ; CHECK-LABEL: vdivu_vv_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i16> %va, %vb
   ret <vscale x 4 x i16> %vc
@@ -392,7 +392,7 @@ define <vscale x 4 x i16> @vdivu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -422,7 +422,7 @@ define <vscale x 8 x i16> @vdivu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8
 ; CHECK-LABEL: vdivu_vv_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i16> %va, %vb
   ret <vscale x 8 x i16> %vc
@@ -432,7 +432,7 @@ define <vscale x 8 x i16> @vdivu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -462,7 +462,7 @@ define <vscale x 16 x i16> @vdivu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; CHECK-LABEL: vdivu_vv_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 16 x i16> %va, %vb
   ret <vscale x 16 x i16> %vc
@@ -472,7 +472,7 @@ define <vscale x 16 x i16> @vdivu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signe
 ; CHECK-LABEL: vdivu_vx_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
@@ -502,7 +502,7 @@ define <vscale x 32 x i16> @vdivu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; CHECK-LABEL: vdivu_vv_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 32 x i16> %va, %vb
   ret <vscale x 32 x i16> %vc
@@ -512,7 +512,7 @@ define <vscale x 32 x i16> @vdivu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signe
 ; CHECK-LABEL: vdivu_vx_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
@@ -542,7 +542,7 @@ define <vscale x 1 x i32> @vdivu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
 ; CHECK-LABEL: vdivu_vv_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i32> %va, %vb
   ret <vscale x 1 x i32> %vc
@@ -552,7 +552,7 @@ define <vscale x 1 x i32> @vdivu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@@ -582,7 +582,7 @@ define <vscale x 2 x i32> @vdivu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
 ; CHECK-LABEL: vdivu_vv_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i32> %va, %vb
   ret <vscale x 2 x i32> %vc
@@ -592,7 +592,7 @@ define <vscale x 2 x i32> @vdivu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -622,7 +622,7 @@ define <vscale x 4 x i32> @vdivu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
 ; CHECK-LABEL: vdivu_vv_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i32> %va, %vb
   ret <vscale x 4 x i32> %vc
@@ -632,7 +632,7 @@ define <vscale x 4 x i32> @vdivu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -662,7 +662,7 @@ define <vscale x 8 x i32> @vdivu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: vdivu_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i32> %va, %vb
   ret <vscale x 8 x i32> %vc
@@ -672,7 +672,7 @@ define <vscale x 8 x i32> @vdivu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -702,7 +702,7 @@ define <vscale x 16 x i32> @vdivu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; CHECK-LABEL: vdivu_vv_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 16 x i32> %va, %vb
   ret <vscale x 16 x i32> %vc
@@ -712,7 +712,7 @@ define <vscale x 16 x i32> @vdivu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
@@ -742,7 +742,7 @@ define <vscale x 1 x i64> @vdivu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
 ; CHECK-LABEL: vdivu_vv_nxv1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i64> %va, %vb
   ret <vscale x 1 x i64> %vc
@@ -759,7 +759,7 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v26, v26, a1
 ; CHECK-NEXT:    vsrl.vx v26, v26, a1
 ; CHECK-NEXT:    vor.vv v25, v26, v25
-; CHECK-NEXT:    vdiv.vv v8, v8, v25
+; CHECK-NEXT:    vdivu.vv v8, v8, v25
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
@@ -796,7 +796,7 @@ define <vscale x 2 x i64> @vdivu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
 ; CHECK-LABEL: vdivu_vv_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i64> %va, %vb
   ret <vscale x 2 x i64> %vc
@@ -813,7 +813,7 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v28, v28, a1
 ; CHECK-NEXT:    vsrl.vx v28, v28, a1
 ; CHECK-NEXT:    vor.vv v26, v28, v26
-; CHECK-NEXT:    vdiv.vv v8, v8, v26
+; CHECK-NEXT:    vdivu.vv v8, v8, v26
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
@@ -850,7 +850,7 @@ define <vscale x 4 x i64> @vdivu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
 ; CHECK-LABEL: vdivu_vv_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i64> %va, %vb
   ret <vscale x 4 x i64> %vc
@@ -867,7 +867,7 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v12, v12, a1
 ; CHECK-NEXT:    vsrl.vx v12, v12, a1
 ; CHECK-NEXT:    vor.vv v28, v12, v28
-; CHECK-NEXT:    vdiv.vv v8, v8, v28
+; CHECK-NEXT:    vdivu.vv v8, v8, v28
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -904,7 +904,7 @@ define <vscale x 8 x i64> @vdivu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: vdivu_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
@@ -921,7 +921,7 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 ; CHECK-NEXT:    vsll.vx v24, v24, a1
 ; CHECK-NEXT:    vsrl.vx v24, v24, a1
 ; CHECK-NEXT:    vor.vv v16, v24, v16
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll
index 70cd4fba1eb7..bc72099d75eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll
@@ -5,7 +5,7 @@ define <vscale x 1 x i8> @vdivu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i
 ; CHECK-LABEL: vdivu_vv_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i8> %va, %vb
   ret <vscale x 1 x i8> %vc
@@ -15,7 +15,7 @@ define <vscale x 1 x i8> @vdivu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
@@ -44,7 +44,7 @@ define <vscale x 2 x i8> @vdivu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i
 ; CHECK-LABEL: vdivu_vv_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i8> %va, %vb
   ret <vscale x 2 x i8> %vc
@@ -54,7 +54,7 @@ define <vscale x 2 x i8> @vdivu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
@@ -83,7 +83,7 @@ define <vscale x 4 x i8> @vdivu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i
 ; CHECK-LABEL: vdivu_vv_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i8> %va, %vb
   ret <vscale x 4 x i8> %vc
@@ -93,7 +93,7 @@ define <vscale x 4 x i8> @vdivu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
@@ -122,7 +122,7 @@ define <vscale x 8 x i8> @vdivu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i
 ; CHECK-LABEL: vdivu_vv_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i8> %va, %vb
   ret <vscale x 8 x i8> %vc
@@ -132,7 +132,7 @@ define <vscale x 8 x i8> @vdivu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b)
 ; CHECK-LABEL: vdivu_vx_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
@@ -161,7 +161,7 @@ define <vscale x 16 x i8> @vdivu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16
 ; CHECK-LABEL: vdivu_vv_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 16 x i8> %va, %vb
   ret <vscale x 16 x i8> %vc
@@ -171,7 +171,7 @@ define <vscale x 16 x i8> @vdivu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %
 ; CHECK-LABEL: vdivu_vx_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
@@ -200,7 +200,7 @@ define <vscale x 32 x i8> @vdivu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32
 ; CHECK-LABEL: vdivu_vv_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 32 x i8> %va, %vb
   ret <vscale x 32 x i8> %vc
@@ -210,7 +210,7 @@ define <vscale x 32 x i8> @vdivu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 signext %
 ; CHECK-LABEL: vdivu_vx_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
@@ -239,7 +239,7 @@ define <vscale x 64 x i8> @vdivu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64
 ; CHECK-LABEL: vdivu_vv_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 64 x i8> %va, %vb
   ret <vscale x 64 x i8> %vc
@@ -249,7 +249,7 @@ define <vscale x 64 x i8> @vdivu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 signext %
 ; CHECK-LABEL: vdivu_vx_nxv64i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
   %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
@@ -278,7 +278,7 @@ define <vscale x 1 x i16> @vdivu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1
 ; CHECK-LABEL: vdivu_vv_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i16> %va, %vb
   ret <vscale x 1 x i16> %vc
@@ -288,7 +288,7 @@ define <vscale x 1 x i16> @vdivu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
@@ -318,7 +318,7 @@ define <vscale x 2 x i16> @vdivu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2
 ; CHECK-LABEL: vdivu_vv_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i16> %va, %vb
   ret <vscale x 2 x i16> %vc
@@ -328,7 +328,7 @@ define <vscale x 2 x i16> @vdivu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
@@ -358,7 +358,7 @@ define <vscale x 4 x i16> @vdivu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4
 ; CHECK-LABEL: vdivu_vv_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i16> %va, %vb
   ret <vscale x 4 x i16> %vc
@@ -368,7 +368,7 @@ define <vscale x 4 x i16> @vdivu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
@@ -398,7 +398,7 @@ define <vscale x 8 x i16> @vdivu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8
 ; CHECK-LABEL: vdivu_vv_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i16> %va, %vb
   ret <vscale x 8 x i16> %vc
@@ -408,7 +408,7 @@ define <vscale x 8 x i16> @vdivu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext
 ; CHECK-LABEL: vdivu_vx_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
@@ -438,7 +438,7 @@ define <vscale x 16 x i16> @vdivu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x
 ; CHECK-LABEL: vdivu_vv_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 16 x i16> %va, %vb
   ret <vscale x 16 x i16> %vc
@@ -448,7 +448,7 @@ define <vscale x 16 x i16> @vdivu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signe
 ; CHECK-LABEL: vdivu_vx_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
@@ -478,7 +478,7 @@ define <vscale x 32 x i16> @vdivu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x
 ; CHECK-LABEL: vdivu_vv_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 32 x i16> %va, %vb
   ret <vscale x 32 x i16> %vc
@@ -488,7 +488,7 @@ define <vscale x 32 x i16> @vdivu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signe
 ; CHECK-LABEL: vdivu_vx_nxv32i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
   %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
@@ -518,7 +518,7 @@ define <vscale x 1 x i32> @vdivu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1
 ; CHECK-LABEL: vdivu_vv_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i32> %va, %vb
   ret <vscale x 1 x i32> %vc
@@ -528,7 +528,7 @@ define <vscale x 1 x i32> @vdivu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext
 ; CHECK-LABEL: vdivu_vx_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,mf2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
@@ -558,7 +558,7 @@ define <vscale x 2 x i32> @vdivu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2
 ; CHECK-LABEL: vdivu_vv_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i32> %va, %vb
   ret <vscale x 2 x i32> %vc
@@ -568,7 +568,7 @@ define <vscale x 2 x i32> @vdivu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext
 ; CHECK-LABEL: vdivu_vx_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -598,7 +598,7 @@ define <vscale x 4 x i32> @vdivu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4
 ; CHECK-LABEL: vdivu_vv_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i32> %va, %vb
   ret <vscale x 4 x i32> %vc
@@ -608,7 +608,7 @@ define <vscale x 4 x i32> @vdivu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext
 ; CHECK-LABEL: vdivu_vx_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
@@ -638,7 +638,7 @@ define <vscale x 8 x i32> @vdivu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8
 ; CHECK-LABEL: vdivu_vv_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i32> %va, %vb
   ret <vscale x 8 x i32> %vc
@@ -648,7 +648,7 @@ define <vscale x 8 x i32> @vdivu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext
 ; CHECK-LABEL: vdivu_vx_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
@@ -678,7 +678,7 @@ define <vscale x 16 x i32> @vdivu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x
 ; CHECK-LABEL: vdivu_vv_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 16 x i32> %va, %vb
   ret <vscale x 16 x i32> %vc
@@ -688,7 +688,7 @@ define <vscale x 16 x i32> @vdivu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signe
 ; CHECK-LABEL: vdivu_vx_nxv16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
   %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
@@ -718,7 +718,7 @@ define <vscale x 1 x i64> @vdivu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
 ; CHECK-LABEL: vdivu_vv_nxv1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v9
+; CHECK-NEXT:    vdivu.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 1 x i64> %va, %vb
   ret <vscale x 1 x i64> %vc
@@ -728,7 +728,7 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
@@ -760,7 +760,7 @@ define <vscale x 2 x i64> @vdivu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
 ; CHECK-LABEL: vdivu_vv_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m2,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v10
+; CHECK-NEXT:    vdivu.vv v8, v8, v10
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 2 x i64> %va, %vb
   ret <vscale x 2 x i64> %vc
@@ -770,7 +770,7 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m2,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
@@ -802,7 +802,7 @@ define <vscale x 4 x i64> @vdivu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
 ; CHECK-LABEL: vdivu_vv_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m4,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v12
+; CHECK-NEXT:    vdivu.vv v8, v8, v12
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 4 x i64> %va, %vb
   ret <vscale x 4 x i64> %vc
@@ -812,7 +812,7 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m4,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -844,7 +844,7 @@ define <vscale x 8 x i64> @vdivu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 ; CHECK-LABEL: vdivu_vv_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vdiv.vv v8, v8, v16
+; CHECK-NEXT:    vdivu.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %vc = udiv <vscale x 8 x i64> %va, %vb
   ret <vscale x 8 x i64> %vc
@@ -854,7 +854,7 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 ; CHECK-LABEL: vdivu_vx_nxv8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e64,m8,ta,mu
-; CHECK-NEXT:    vdiv.vx v8, v8, a0
+; CHECK-NEXT:    vdivu.vx v8, v8, a0
 ; CHECK-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer

From 2cf21fd6a5b4a6f0f0da55717a787fc38202cca8 Mon Sep 17 00:00:00 2001
From: Joachim Meyer <joachim@joameyer.de>
Date: Thu, 17 Dec 2020 23:58:13 +0100
Subject: [PATCH 083/244] [Support] Indent multi-line descr of enum cli
 options.

As noted in https://reviews.llvm.org/D93459, the formatting of
multi-line descriptions of clEnumValN and the likes is unfavorable.
Thus this patch adds support for correctly indenting these.

Reviewed By: serge-sans-paille

Differential Revision: https://reviews.llvm.org/D93494

(cherry picked from commit e3f02302e318837d2421c6425450f04ae0a82b90)
---
 llvm/include/llvm/Support/CommandLine.h    | 13 +++++++++++
 llvm/lib/Support/CommandLine.cpp           | 25 ++++++++++++++++------
 llvm/unittests/Support/CommandLineTest.cpp | 22 +++++++++++++++++++
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 38f3e188be55..0706aa226c0e 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -369,9 +369,22 @@ class Option {
 
   virtual void setDefault() = 0;
 
+  // Prints the help string for an option.
+  //
+  // This maintains the Indent for multi-line descriptions.
+  // FirstLineIndentedBy is the count of chars of the first line
+  //      i.e. the one containing the --<option name>.
   static void printHelpStr(StringRef HelpStr, size_t Indent,
                            size_t FirstLineIndentedBy);
 
+  // Prints the help string for an enum value.
+  //
+  // This maintains the Indent for multi-line descriptions.
+  // FirstLineIndentedBy is the count of chars of the first line
+  //      i.e. the one containing the =<value>.
+  static void printEnumValHelpStr(StringRef HelpStr, size_t Indent,
+                                  size_t FirstLineIndentedBy);
+
   virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
 
   // addOccurrence - Wrapper around handleOccurrence that enforces Flags.
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 6d89481bf28a..e2f014d1815b 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1726,6 +1726,19 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
   }
 }
 
+void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
+                                 size_t FirstLineIndentedBy) {
+  const StringRef ValHelpPrefix = "  ";
+  assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
+  std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
+  outs().indent(BaseIndent - FirstLineIndentedBy)
+      << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";
+  while (!Split.second.empty()) {
+    Split = Split.second.split('\n');
+    outs().indent(BaseIndent + ValHelpPrefix.size()) << Split.first << "\n";
+  }
+}
+
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
   outs() << PrintArg(ArgStr);
@@ -1971,17 +1984,17 @@ void generic_parser_base::printOptionInfo(const Option &O,
       StringRef Description = getDescription(i);
       if (!shouldPrintOption(OptionName, Description, O))
         continue;
-      assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize);
-      size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize;
+      size_t FirstLineIndent = OptionName.size() + OptionPrefixesSize;
       outs() << OptionPrefix << OptionName;
       if (OptionName.empty()) {
         outs() << EmptyOption;
-        assert(NumSpaces >= EmptyOption.size());
-        NumSpaces -= EmptyOption.size();
+        assert(FirstLineIndent >= EmptyOption.size());
+        FirstLineIndent += EmptyOption.size();
       }
       if (!Description.empty())
-        outs().indent(NumSpaces) << ArgHelpPrefix << "  " << Description;
-      outs() << '\n';
+        Option::printEnumValHelpStr(Description, GlobalWidth, FirstLineIndent);
+      else
+        outs() << '\n';
     }
   } else {
     if (!O.HelpStr.empty())
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index a05f3894ef05..4accdc5ea1fb 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -1263,6 +1263,28 @@ TEST_F(PrintOptionInfoTest, PrintOptionInfoEmptyValueDescription) {
   // clang-format on
 }
 
+TEST_F(PrintOptionInfoTest, PrintOptionInfoMultilineValueDescription) {
+  std::string Output =
+      runTest(cl::ValueRequired,
+              cl::values(clEnumValN(OptionValue::Val, "v1",
+                                    "This is the first enum value\n"
+                                    "which has a really long description\n"
+                                    "thus it is multi-line."),
+                         clEnumValN(OptionValue::Val, "",
+                                    "This is an unnamed enum value option\n"
+                                    "Should be indented as well")));
+
+  // clang-format off
+  EXPECT_EQ(Output,
+            ("  --" + Opt + "=<value> - " + HelpText + "\n"
+             "    =v1                 -   This is the first enum value\n"
+             "                            which has a really long description\n"
+             "                            thus it is multi-line.\n"
+             "    =<empty>            -   This is an unnamed enum value option\n"
+             "                            Should be indented as well\n").str());
+  // clang-format on
+}
+
 class GetOptionWidthTest : public ::testing::Test {
 public:
   enum class OptionValue { Val };

From a67a4346f78d856c6224578392b128ab1d55009e Mon Sep 17 00:00:00 2001
From: Ayke van Laethem <aykevanlaethem@gmail.com>
Date: Tue, 2 Feb 2021 20:58:31 +0100
Subject: [PATCH 084/244] [ARM] Do not emit ldrexd/strexd on Cortex-M chips

The ldrexd/strexd instructions are not supported on M-class chips, see
for example
https://developer.arm.com/documentation/dui0489/e/arm-and-thumb-instructions/memory-access-instructions/ldrex-and-strex
which says:

> All these 32-bit Thumb instructions are available in ARMv6T2 and
> above, except that LDREXD and STREXD are not available in the ARMv7-M
> architecture.

Looking at the ARMv8-M architecture, it appears that these instructions
aren't supported either. The Architecture Reference Manual lists
ldrex/strex but not ldrexd/strexd:
https://developer.arm.com/documentation/ddi0553/bn/

Godbolt example on LLVM 11.0.0, which incorrectly emits ldrexd/strexd
instructions: https://llvm.godbolt.org/z/5qqPnE

Differential Revision: https://reviews.llvm.org/D95891

(cherry picked from commit aecdf15cc7f866180dc769265b8183cad34bb33a)
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  6 ++++-
 llvm/test/CodeGen/ARM/atomic-64bit.ll   | 35 +++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 397979b4ab1e..598062672a56 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18661,6 +18661,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
              : AtomicExpansionKind::None;
 }
 
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used  up to 32
+// bits, and up to 64 bits on the non-M profiles.
 TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18668,9 +18670,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   // on the stack and close enough to the spill slot, this can lead to a
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+  unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
   bool HasAtomicCmpXchg =
       !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
-  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+      Size <= (Subtarget->isMClass() ? 32U : 64U))
     return AtomicExpansionKind::LLSC;
   return AtomicExpansionKind::None;
 }
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index 8841483c97a4..eadefcd23bc6 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
 ; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 ; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE
+; RUN: llc < %s -mtriple=armv7m--none-eabi | FileCheck %s --check-prefix=CHECK-M
+; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M
 
 define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test1:
@@ -28,6 +30,8 @@ define i64 @test1(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_add_8
+
   %r = atomicrmw add i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -57,6 +61,8 @@ define i64 @test2(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_sub_8
+
   %r = atomicrmw sub i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -86,6 +92,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_and_8
+
   %r = atomicrmw and i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -115,6 +123,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_or_8
+
   %r = atomicrmw or i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -144,6 +154,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_xor_8
+
   %r = atomicrmw xor i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -165,6 +177,8 @@ define i64 @test6(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_lock_test_and_set_8
+
   %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -199,12 +213,15 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-THUMB: beq
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_val_compare_and_swap_8
+
   %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
   %r = extractvalue { i64, i1 } %pair, 0
   ret i64 %r
 }
 
-; Compiles down to a single ldrexd
+; Compiles down to a single ldrexd, except on M class devices where ldrexd
+; isn't supported.
 define i64 @test8(i64* %ptr) {
 ; CHECK-LABEL: test8:
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
@@ -220,12 +237,15 @@ define i64 @test8(i64* %ptr) {
 ; CHECK-THUMB-NOT: strexd
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_val_compare_and_swap_8
+
   %r = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %r
 }
 
 ; Compiles down to atomicrmw xchg; there really isn't any more efficient
-; way to write it.
+; way to write it. Except on M class devices, where ldrexd/strexd aren't
+; supported.
 define void @test9(i64* %ptr, i64 %val) {
 ; CHECK-LABEL: test9:
 ; CHECK: dmb {{ish$}}
@@ -243,6 +263,8 @@ define void @test9(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_lock_test_and_set_8
+
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
 }
@@ -286,6 +308,8 @@ define i64 @test10(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_min_8
+
   %r = atomicrmw min i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -329,6 +353,8 @@ define i64 @test11(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_umin_8
+
   %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -372,6 +398,8 @@ define i64 @test12(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
 
+; CHECK-M: __sync_fetch_and_max_8
+
   %r = atomicrmw max i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }
@@ -414,6 +442,9 @@ define i64 @test13(i64* %ptr, i64 %val) {
 ; CHECK-THUMB: cmp
 ; CHECK-THUMB: bne
 ; CHECK-THUMB: dmb {{ish$}}
+
+; CHECK-M: __sync_fetch_and_umax_8
+
   %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
   ret i64 %r
 }

From dccfafaf8cc1085724331a8427f656d7636679ba Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Thu, 4 Feb 2021 13:38:38 -0800
Subject: [PATCH 085/244] Revert "[BuildLibcalls, Attrs] Support more variants
 of C++'s new, add attributes for C++'s delete"

Several of the new attributes here were incorrect, and even the ones
that are generally correct were being added even to nobuiltin calls.

This reverts commit bb3f169b59e1c8bd7fd70097532220bbd11e9967.

(cherry picked from commit 1484ad4137b5d627573672bad48b03785f8fdefd)
---
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp   | 42 -------------------
 .../Transforms/InferFunctionAttrs/annotate.ll | 36 +++-------------
 2 files changed, 5 insertions(+), 73 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index f4afa3ad4623..811b9c04906d 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1005,52 +1005,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
     return Changed;
-  case LibFunc_ZdlPvRKSt9nothrow_t: // delete(void*, nothrow)
-  case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // delete(void*, align_val_t, nothrow)
-  case LibFunc_ZdaPvRKSt9nothrow_t: // delete[](void*, nothrow)
-  case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: // delete[](void*, align_val_t, nothrow)
-    Changed |= setDoesNotThrow(F);
-    LLVM_FALLTHROUGH;
-  case LibFunc_ZdlPv: // delete(void*)
-  case LibFunc_ZdlPvj: // delete(void*, unsigned int)
-  case LibFunc_ZdlPvm: // delete(void*, unsigned long)
-  case LibFunc_ZdaPv: // delete[](void*)
-  case LibFunc_ZdaPvj: // delete[](void*, unsigned int)
-  case LibFunc_ZdaPvm: // delete[](void*, unsigned long)
-  case LibFunc_ZdlPvSt11align_val_t: // delete(void*, align_val_t)
-  case LibFunc_ZdlPvjSt11align_val_t: // delete(void*, unsigned int, align_val_t)
-  case LibFunc_ZdlPvmSt11align_val_t: // delete(void*, unsigned long, align_val_t)
-  case LibFunc_ZdaPvSt11align_val_t: // delete[](void*, align_val_t)
-  case LibFunc_ZdaPvjSt11align_val_t: // delete[](void*, unsigned int, align_val_t)
-  case LibFunc_ZdaPvmSt11align_val_t: // delete[](void*, unsigned long, align_val_t);
-    Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
-    Changed |= setArgsNoUndef(F);
-    Changed |= setWillReturn(F);
-    Changed |= setDoesNotCapture(F, 0);
-    return Changed;
-  case LibFunc_ZnwjRKSt9nothrow_t: // new(unsigned int, nothrow)
-  case LibFunc_ZnwmRKSt9nothrow_t: // new(unsigned long, nothrow)
-  case LibFunc_ZnajRKSt9nothrow_t: // new[](unsigned int, nothrow)
-  case LibFunc_ZnamRKSt9nothrow_t: // new[](unsigned long, nothrow)
-  case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: // new(unsigned int, align_val_t, nothrow)
-  case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: // new(unsigned long, align_val_t, nothrow)
-  case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: // new[](unsigned int, align_val_t, nothrow)
-  case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: // new[](unsigned long, align_val_t, nothrow)
-    // Nothrow operator new may return null pointer
-    Changed |= setDoesNotThrow(F);
-    Changed |= setOnlyAccessesInaccessibleMemory(F);
-    Changed |= setRetNoUndef(F);
-    Changed |= setRetDoesNotAlias(F);
-    Changed |= setWillReturn(F);
-    return Changed;
   case LibFunc_Znwj: // new(unsigned int)
   case LibFunc_Znwm: // new(unsigned long)
   case LibFunc_Znaj: // new[](unsigned int)
   case LibFunc_Znam: // new[](unsigned long)
-  case LibFunc_ZnwjSt11align_val_t: // new(unsigned int, align_val_t)
-  case LibFunc_ZnwmSt11align_val_t: // new(unsigned long, align_val_t)
-  case LibFunc_ZnajSt11align_val_t: // new[](unsigned int, align_val_t)
-  case LibFunc_ZnamSt11align_val_t: // new[](unsigned long, align_val_t)
   case LibFunc_msvc_new_int: // new(unsigned int)
   case LibFunc_msvc_new_longlong: // new(unsigned long long)
   case LibFunc_msvc_new_array_int: // new[](unsigned int)
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index a847db7eb550..0af18151d6f6 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -9,30 +9,6 @@ declare i8* @_Znwj(i64 )
 ; CHECK: declare noalias noundef nonnull i8* @_Znwj(i64) [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN:#[0-9]+]]
 declare i8* @_Znwm(i64)
 ; CHECK: declare noalias noundef nonnull i8* @_Znwm(i64) [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN]]
-declare i8* @_Znaj(i64)
-; CHECK: declare noalias noundef nonnull i8* @_Znaj(i64) [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN]]
-declare i8* @_Znam(i64)
-; CHECK: declare noalias noundef nonnull i8* @_Znam(i64) [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN]]
-
-
-%"struct.std::nothrow_t" = type { i8 }
-declare i8* @_ZnwmRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*)
-; CHECK: declare noalias noundef i8* @_ZnwmRKSt9nothrow_t(i64, %"struct.std::nothrow_t"*) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]]
-
-
-; operator delete routines
-declare void @_ZdlPv(i8*)
-; CHECK: declare void @_ZdlPv(i8* nocapture noundef) [[INACCESSIBLEMEMORARGONLY_WILLRETURN:#[0-9]+]]
-declare void @_ZdlPvj(i8*, i64)
-; CHECK: declare void @_ZdlPvj(i8* nocapture noundef, i64 noundef) [[INACCESSIBLEMEMORARGONLY_WILLRETURN]]
-declare void @_ZdlPvm(i8*, i64)
-; CHECK: declare void @_ZdlPvm(i8* nocapture noundef, i64 noundef) [[INACCESSIBLEMEMORARGONLY_WILLRETURN]]
-declare void @_ZdaPv(i8*)
-; CHECK: declare void @_ZdaPv(i8* nocapture noundef) [[INACCESSIBLEMEMORARGONLY_WILLRETURN]]
-declare void @_ZdaPvj(i8*, i64)
-; CHECK: declare void @_ZdaPvj(i8* nocapture noundef, i64 noundef) [[INACCESSIBLEMEMORARGONLY_WILLRETURN]]
-declare void @_ZdaPvm(i8*, i64)
-; CHECK: declare void @_ZdaPvm(i8* nocapture noundef, i64 noundef) [[INACCESSIBLEMEMORARGONLY_WILLRETURN]]
 
 declare i32 @__nvvm_reflect(i8*)
 ; CHECK-NVPTX: declare noundef i32 @__nvvm_reflect(i8* noundef) [[NOFREE_NOUNWIND_READNONE:#[0-9]+]]
@@ -280,7 +256,7 @@ declare void @bcopy(i8*, i8*, i64)
 ; CHECK: declare void @bzero(i8* nocapture writeonly, i64)  [[ARGMEMONLY_NOFREE_NOUNWIND:#[0-9]+]]
 declare void @bzero(i8*, i64)
 
-; CHECK: declare noalias noundef i8* @calloc(i64, i64) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN]]
+; CHECK: declare noalias noundef i8* @calloc(i64, i64) [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN:#[0-9]+]]
 declare i8* @calloc(i64, i64)
 
 ; CHECK: declare double @cbrt(double) [[NOFREE_NOUNWIND_WILLRETURN]]
@@ -478,7 +454,7 @@ declare i32 @fputs(i8*, %opaque*)
 ; CHECK: declare noundef i64 @fread(i8* nocapture noundef, i64 noundef, i64 noundef, %opaque* nocapture noundef) [[NOFREE_NOUNWIND]]
 declare i64 @fread(i8*, i64, i64, %opaque*)
 
-; CHECK: declare void @free(i8* nocapture noundef) [[INACCESSIBLEMEMORARGONLY_NOUNWIND_WILLRETURN:#[0-9]+]]
+; CHECK: declare void @free(i8* nocapture noundef) [[NOUNWIND:#[0-9]+]]
 declare void @free(i8*)
 
 ; CHECK: declare double @frexp(double, i32* nocapture) [[NOFREE_NOUNWIND_WILLRETURN]]
@@ -757,7 +733,7 @@ declare i64 @read(i32, i8*, i64)
 ; CHECK: declare noundef i64 @readlink(i8* nocapture noundef readonly, i8* nocapture noundef, i64 noundef) [[NOFREE_NOUNWIND]]
 declare i64 @readlink(i8*, i8*, i64)
 
-; CHECK: declare noalias noundef i8* @realloc(i8* nocapture, i64) [[INACCESSIBLEMEMORARGONLY_NOUNWIND_WILLRETURN]]
+; CHECK: declare noalias noundef i8* @realloc(i8* nocapture, i64) [[NOUNWIND]]
 declare i8* @realloc(i8*, i64)
 
 ; CHECK: declare noundef i8* @reallocf(i8*, i64)
@@ -1042,10 +1018,9 @@ declare i64 @write(i32, i8*, i64)
 declare void @memset_pattern16(i8*, i8*, i64)
 
 ; CHECK-DAG-UNKNOWN: attributes [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN]] = { inaccessiblememonly nofree willreturn }
-; CHECK-DAG-UNKNOWN: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { inaccessiblememonly nofree nounwind willreturn }
-; CHECK-DAG-UNKNOWN: attributes [[INACCESSIBLEMEMORARGONLY_WILLRETURN]] = { inaccessiblemem_or_argmemonly willreturn }
 ; CHECK-DAG-UNKNOWN: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn }
 ; CHECK-DAG-UNKNOWN: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind }
+; CHECK-DAG-UNKNOWN: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { inaccessiblememonly nofree nounwind willreturn }
 ; CHECK-DAG-UNKNOWN: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]]  = { nofree nounwind readonly willreturn }
 ; CHECK-DAG-UNKNOWN: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nofree nounwind willreturn }
 ; CHECK-DAG-UNKNOWN: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind readonly }
@@ -1057,11 +1032,10 @@ declare void @memset_pattern16(i8*, i8*, i64)
 ; CHECK-DAG-UNKNOWN: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN]]  = { inaccessiblemem_or_argmemonly nofree nounwind willreturn }
 
 ; CHECK-DAG-LINUX: attributes [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN]] = { inaccessiblememonly nofree willreturn }
-; CHECK-DAG-LINUX: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { inaccessiblememonly nofree nounwind willreturn }
-; CHECK-DAG-LINUX: attributes [[INACCESSIBLEMEMORARGONLY_WILLRETURN]] = { inaccessiblemem_or_argmemonly willreturn }
 ; CHECK-DAG-LINUX: attributes [[NOFREE]] = { nofree }
 ; CHECK-DAG-LINUX: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn }
 ; CHECK-DAG-LINUX: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind }
+; CHECK-DAG-LINUX: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { inaccessiblememonly nofree nounwind willreturn }
 ; CHECK-DAG-LINUX: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]]  = { nofree nounwind readonly willreturn }
 ; CHECK-DAG-LINUX: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nofree nounwind readonly willreturn }
 ; CHECK-DAG-LINUX: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nofree nounwind willreturn }

From 97dd9224f1033b1d6313ae80047e48ff964ca77d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Thu, 4 Feb 2021 13:55:28 -0800
Subject: [PATCH 086/244] Don't infer attributes on '::operator new'.

These attributes were all incorrect or inappropriate for LLVM to infer:
- inaccessiblememonly is generally wrong; user replacement operator new
  can access memory that's visible to the caller, as can a new_handler
  function.
- willreturn is generally wrong; a custom new_handler is not guaranteed
  to terminate.
- noalias is inappropriate: Clang has a flag to determine whether this
  attribute should be present and adds it itself when appropriate.
- noundef and nonnull on the return value should be specified by the
  frontend on all 'operator new' functions if we want them, not here.

In any case, inferring attributes on functions declared 'nobuiltin' (as
these are when Clang emits them) seems questionable.

(cherry picked from commit ab243efb261ba7e27f4b14e1a6fbbff15a79c0bf)
---
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp   | 25 -------------------
 .../Transforms/InferFunctionAttrs/annotate.ll |  6 -----
 2 files changed, 31 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 811b9c04906d..dba5403f272a 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -170,16 +170,6 @@ static bool setRetAndArgsNoUndef(Function &F) {
   return setRetNoUndef(F) | setArgsNoUndef(F);
 }
 
-static bool setRetNonNull(Function &F) {
-  assert(F.getReturnType()->isPointerTy() &&
-         "nonnull applies only to pointers");
-  if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
-    return false;
-  F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
-  ++NumNonNull;
-  return true;
-}
-
 static bool setReturnedArg(Function &F, unsigned ArgNo) {
   if (F.hasParamAttribute(ArgNo, Attribute::Returned))
     return false;
@@ -1005,21 +995,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 0);
     Changed |= setDoesNotCapture(F, 1);
     return Changed;
-  case LibFunc_Znwj: // new(unsigned int)
-  case LibFunc_Znwm: // new(unsigned long)
-  case LibFunc_Znaj: // new[](unsigned int)
-  case LibFunc_Znam: // new[](unsigned long)
-  case LibFunc_msvc_new_int: // new(unsigned int)
-  case LibFunc_msvc_new_longlong: // new(unsigned long long)
-  case LibFunc_msvc_new_array_int: // new[](unsigned int)
-  case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
-    Changed |= setOnlyAccessesInaccessibleMemory(F);
-    // Operator new always returns a nonnull noalias pointer
-    Changed |= setRetNoUndef(F);
-    Changed |= setRetNonNull(F);
-    Changed |= setRetDoesNotAlias(F);
-    Changed |= setWillReturn(F);
-    return Changed;
   // TODO: add LibFunc entries for:
   // case LibFunc_memset_pattern4:
   // case LibFunc_memset_pattern8:
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index 0af18151d6f6..5c6ec0b683ca 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -4,12 +4,6 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -inferattrs -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-LINUX %s
 ; RUN: opt < %s -mtriple=nvptx -inferattrs -S | FileCheck -check-prefix=CHECK-NVPTX %s
 
-; operator new routines
-declare i8* @_Znwj(i64 )
-; CHECK: declare noalias noundef nonnull i8* @_Znwj(i64) [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN:#[0-9]+]]
-declare i8* @_Znwm(i64)
-; CHECK: declare noalias noundef nonnull i8* @_Znwm(i64) [[INACCESSIBLEMEMONLY_NOFREE_WILLRETURN]]
-
 declare i32 @__nvvm_reflect(i8*)
 ; CHECK-NVPTX: declare noundef i32 @__nvvm_reflect(i8* noundef) [[NOFREE_NOUNWIND_READNONE:#[0-9]+]]
 ; CHECK-NVPTX: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nounwind readnone }

From 3287b6f9d552f0c542df1c7c0504aad24faf4c53 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 1 Feb 2021 23:53:54 -0800
Subject: [PATCH 087/244] [RISCV] Replace NoX0 SDNodeXForm with a
 ComplexPattern to do the selection of the VL operand.

I think this is a more standard way of doing this.

Reviewed By: rogfer01

Differential Revision: https://reviews.llvm.org/D95833

(cherry picked from commit e7f9a834996f40be8dc46a0b059aa850f1f4ef05)
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |  17 ++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h     |   2 +
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    | 199 +++++++++---------
 3 files changed, 114 insertions(+), 104 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 7b0f38671f06..517e714cb59f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -946,6 +946,23 @@ bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
   return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
 }
 
+// X0 has special meaning for vsetvl/vsetvli.
+//  rd | rs1 |   AVL value | Effect on vl
+//--------------------------------------------------------------
+// !X0 |  X0 |       VLMAX | Set vl to VLMAX
+//  X0 |  X0 | Value in vl | Keep current vl, just change vtype.
+bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
+  // If the VL value is a constant 0, manually select it to an ADDI with 0
+  // immediate to prevent the default selection path from matching it to X0.
+  auto *C = dyn_cast<ConstantSDNode>(N);
+  if (C && C->isNullValue())
+    VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0);
+  else
+    VL = N;
+
+  return true;
+}
+
 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
   if (N.getOpcode() != ISD::SPLAT_VECTOR &&
       N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 23601c3b8f06..0c58c5379e13 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -51,6 +51,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   bool MatchSROIW(SDNode *N) const;
   bool MatchSLLIUW(SDNode *N) const;
 
+  bool selectVLOp(SDValue N, SDValue &VL);
+
   bool selectVSplat(SDValue N, SDValue &SplatVal);
   bool selectVSplatSimm5(SDValue N, SDValue &SplatVal);
   bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 9fdfc2727d86..fa17f2d87eff 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -42,17 +42,7 @@ def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
 //--------------------------------------------------------------
 // !X0 |  X0 |       VLMAX | Set vl to VLMAX
 //  X0 |  X0 | Value in vl | Keep current vl, just change vtype.
-def NoX0 : SDNodeXForm<undef,
-[{
-  auto *C = dyn_cast<ConstantSDNode>(N);
-  if (C && C->isNullValue()) {
-    SDLoc DL(N);
-    return SDValue(CurDAG->getMachineNode(RISCV::ADDI, DL, Subtarget->getXLenVT(),
-                   CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()),
-                   CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT())), 0);
-  }
-  return SDValue(N, 0);
-}]>;
+def VLOp : ComplexPattern<XLenVT, 1, "selectVLOp">;
 
 def DecImm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getSExtValue() - 1, SDLoc(N),
@@ -1951,10 +1941,10 @@ class VPatUnaryNoMask<string intrinsic_name,
                       VReg op2_reg_class> :
   Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
                    (op2_type op2_reg_class:$rs2),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                    (op2_type op2_reg_class:$rs2),
-                   (NoX0 GPR:$vl), sew)>;
+                   GPR:$vl, sew)>;
 
 class VPatUnaryMask<string intrinsic_name,
                     string inst,
@@ -1970,21 +1960,21 @@ class VPatUnaryMask<string intrinsic_name,
                    (result_type result_reg_class:$merge),
                    (op2_type op2_reg_class:$rs2),
                    (mask_type V0),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
                    (result_type result_reg_class:$merge),
                    (op2_type op2_reg_class:$rs2),
-                   (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                   (mask_type V0), GPR:$vl, sew)>;
 
 class VPatMaskUnaryNoMask<string intrinsic_name,
                           string inst,
                           MTypeInfo mti> :
   Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name)
                 (mti.Mask VR:$rs2),
-                (XLenVT GPR:$vl))),
+                (XLenVT (VLOp GPR:$vl)))),
                 (!cast<Instruction>(inst#"_M_"#mti.BX)
                 (mti.Mask VR:$rs2),
-                (NoX0 GPR:$vl), mti.SEW)>;
+                GPR:$vl, mti.SEW)>;
 
 class VPatMaskUnaryMask<string intrinsic_name,
                         string inst,
@@ -1993,11 +1983,11 @@ class VPatMaskUnaryMask<string intrinsic_name,
                 (mti.Mask VR:$merge),
                 (mti.Mask VR:$rs2),
                 (mti.Mask V0),
-                (XLenVT GPR:$vl))),
+                (XLenVT (VLOp GPR:$vl)))),
                 (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK")
                 (mti.Mask VR:$merge),
                 (mti.Mask VR:$rs2),
-                (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+                (mti.Mask V0), GPR:$vl, mti.SEW)>;
 
 class VPatUnaryAnyMask<string intrinsic,
                        string inst,
@@ -2013,12 +2003,12 @@ class VPatUnaryAnyMask<string intrinsic,
                    (result_type result_reg_class:$merge),
                    (op1_type op1_reg_class:$rs1),
                    (mask_type VR:$rs2),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                    (result_type result_reg_class:$merge),
                    (op1_type op1_reg_class:$rs1),
                    (mask_type VR:$rs2),
-                   (NoX0 GPR:$vl), sew)>;
+                   GPR:$vl, sew)>;
 
 class VPatBinaryNoMask<string intrinsic_name,
                        string inst,
@@ -2031,11 +2021,11 @@ class VPatBinaryNoMask<string intrinsic_name,
   Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst)
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
-                   (NoX0 GPR:$vl), sew)>;
+                   GPR:$vl, sew)>;
 
 class VPatBinaryMask<string intrinsic_name,
                      string inst,
@@ -2052,12 +2042,12 @@ class VPatBinaryMask<string intrinsic_name,
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
                    (mask_type V0),
-                   (XLenVT GPR:$vl))),
+                   (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_MASK")
                    (result_type result_reg_class:$merge),
                    (op1_type op1_reg_class:$rs1),
                    (op2_type op2_kind:$rs2),
-                   (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                   (mask_type V0), GPR:$vl, sew)>;
 
 class VPatTernaryNoMask<string intrinsic,
                         string inst,
@@ -2075,12 +2065,12 @@ class VPatTernaryNoMask<string intrinsic,
                     (result_type result_reg_class:$rs3),
                     (op1_type op1_reg_class:$rs1),
                     (op2_type op2_kind:$rs2),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                     result_reg_class:$rs3,
                     (op1_type op1_reg_class:$rs1),
                     op2_kind:$rs2,
-                    (NoX0 GPR:$vl), sew)>;
+                    GPR:$vl, sew)>;
 
 class VPatTernaryMask<string intrinsic,
                       string inst,
@@ -2099,13 +2089,13 @@ class VPatTernaryMask<string intrinsic,
                     (op1_type op1_reg_class:$rs1),
                     (op2_type op2_kind:$rs2),
                     (mask_type V0),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
                     result_reg_class:$rs3,
                     (op1_type op1_reg_class:$rs1),
                     op2_kind:$rs2,
                     (mask_type V0),
-                    (NoX0 GPR:$vl), sew)>;
+                    GPR:$vl, sew)>;
 
 class VPatAMOWDNoMask<string intrinsic_name,
                     string inst,
@@ -2119,10 +2109,10 @@ class VPatAMOWDNoMask<string intrinsic_name,
                     GPR:$rs1,
                     (op1_type op1_reg_class:$vs2),
                     (result_type vlmul.vrclass:$vd),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
                     $rs1, $vs2, $vd,
-                    (NoX0 GPR:$vl), sew)>;
+                    GPR:$vl, sew)>;
 
 class VPatAMOWDMask<string intrinsic_name,
                     string inst,
@@ -2138,10 +2128,10 @@ class VPatAMOWDMask<string intrinsic_name,
                     (op1_type op1_reg_class:$vs2),
                     (result_type vlmul.vrclass:$vd),
                     (mask_type V0),
-                    (XLenVT GPR:$vl))),
+                    (XLenVT (VLOp GPR:$vl)))),
                    (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
                     $rs1, $vs2, $vd,
-                    (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                    (mask_type V0), GPR:$vl, sew)>;
 
 multiclass VPatUSLoad<string intrinsic,
                       string inst,
@@ -2153,14 +2143,14 @@ multiclass VPatUSLoad<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(type (Intr GPR:$rs1, GPR:$vl)),
-                    (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (Intr GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
     def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
-                               GPR:$rs1, (mask_type V0), GPR:$vl)),
+                               GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatUSLoadFF<string inst,
@@ -2171,13 +2161,13 @@ multiclass VPatUSLoadFF<string inst,
                         VReg reg_class>
 {
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)),
-                    (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (riscv_vleff GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, GPR:$vl, sew)>;
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
     def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
-                                      GPR:$rs1, (mask_type V0), GPR:$vl)),
+                                      GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatSLoad<string intrinsic,
@@ -2190,14 +2180,14 @@ multiclass VPatSLoad<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, GPR:$vl)),
-                    (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
     def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
-                               GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl)),
+                               GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatILoad<string intrinsic,
@@ -2213,16 +2203,16 @@ multiclass VPatILoad<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
-    def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), GPR:$vl)),
-                    (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl)))),
+                    (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
 
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
     def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
                                GPR:$rs1, (idx_type idx_reg_class:$rs2),
-                               (mask_type V0), GPR:$vl)),
+                               (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
                     (PseudoMask $merge,
-                                $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                                $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatUSStore<string intrinsic,
@@ -2235,12 +2225,12 @@ multiclass VPatUSStore<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$vl),
-                    (Pseudo $rs3, $rs1, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+                    (Pseudo $rs3, $rs1, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
-    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), GPR:$vl),
-              (PseudoMask $rs3, $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+              (PseudoMask $rs3, $rs1, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatSStore<string intrinsic,
@@ -2253,12 +2243,12 @@ multiclass VPatSStore<string intrinsic,
 {
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
-    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, GPR:$vl),
-                    (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+    def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl))),
+                    (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
-    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl),
-              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+    def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatIStore<string intrinsic,
@@ -2275,13 +2265,13 @@ multiclass VPatIStore<string intrinsic,
     defvar Intr = !cast<Intrinsic>(intrinsic);
     defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
     def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1,
-                    (idx_type idx_reg_class:$rs2), GPR:$vl),
-              (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+                    (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl))),
+              (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
     defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
     defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
     def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1,
-                        (idx_type idx_reg_class:$rs2), (mask_type V0), GPR:$vl),
-              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                        (idx_type idx_reg_class:$rs2), (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+              (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatUnaryS_M<string intrinsic_name,
@@ -2289,13 +2279,13 @@ multiclass VPatUnaryS_M<string intrinsic_name,
 {
   foreach mti = AllMasks in {
     def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name)
-                      (mti.Mask VR:$rs1), GPR:$vl)),
+                      (mti.Mask VR:$rs1), (XLenVT (VLOp GPR:$vl)))),
                       (!cast<Instruction>(inst#"_M_"#mti.BX) $rs1,
-                      (NoX0 GPR:$vl), mti.SEW)>;
+                      GPR:$vl, mti.SEW)>;
     def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name # "_mask")
-                      (mti.Mask VR:$rs1), (mti.Mask V0), GPR:$vl)),
+                      (mti.Mask VR:$rs1), (mti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
                       (!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") $rs1,
-                      (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+                      (mti.Mask V0), GPR:$vl, mti.SEW)>;
   }
 }
 
@@ -2360,24 +2350,24 @@ multiclass VPatNullaryV<string intrinsic, string instruction>
 {
   foreach vti = AllIntegerVectors in {
     def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
-                          (XLenVT GPR:$vl))),
+                          (XLenVT (VLOp GPR:$vl)))),
                           (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
-                          (NoX0 GPR:$vl), vti.SEW)>;
+                          GPR:$vl, vti.SEW)>;
     def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
                           (vti.Vector vti.RegClass:$merge),
-                          (vti.Mask V0), (XLenVT GPR:$vl))),
+                          (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
                           (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
                           vti.RegClass:$merge, (vti.Mask V0),
-                          (NoX0 GPR:$vl), vti.SEW)>;
+                          GPR:$vl, vti.SEW)>;
   }
 }
 
 multiclass VPatNullaryM<string intrinsic, string inst> {
   foreach mti = AllMasks in
     def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic)
-                        (XLenVT GPR:$vl))),
+                        (XLenVT (VLOp GPR:$vl)))),
                         (!cast<Instruction>(inst#"_M_"#mti.BX)
-                        (NoX0 GPR:$vl), mti.SEW)>;
+                        GPR:$vl, mti.SEW)>;
 }
 
 multiclass VPatBinary<string intrinsic,
@@ -2414,11 +2404,11 @@ multiclass VPatBinaryCarryIn<string intrinsic,
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
                          (mask_type V0),
-                         (XLenVT GPR:$vl))),
+                         (XLenVT (VLOp GPR:$vl)))),
                          (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
-                         (mask_type V0), (NoX0 GPR:$vl), sew)>;
+                         (mask_type V0), GPR:$vl, sew)>;
 }
 
 multiclass VPatBinaryMaskOut<string intrinsic,
@@ -2435,11 +2425,11 @@ multiclass VPatBinaryMaskOut<string intrinsic,
   def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
-                         (XLenVT GPR:$vl))),
+                         (XLenVT (VLOp GPR:$vl)))),
                          (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
                          (op1_type op1_reg_class:$rs1),
                          (op2_type op2_kind:$rs2),
-                         (NoX0 GPR:$vl), sew)>;
+                         GPR:$vl, sew)>;
 }
 
 multiclass VPatConversion<string intrinsic,
@@ -3886,62 +3876,63 @@ defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>
 // instruction.
 foreach vti = AllIntegerVectors in {
   def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1),
-                                       (vti.Scalar simm5_plus1:$rs2), GPR:$vl)),
+                                       (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                                (DecImm simm5_plus1:$rs2),
-                                                               (NoX0 GPR:$vl),
+                                                               GPR:$vl,
                                                                vti.SEW)>;
   def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0),
                                             (vti.Vector vti.RegClass:$rs1),
                                             (vti.Scalar simm5_plus1:$rs2),
                                             (vti.Mask VR:$merge),
-                                            GPR:$vl)),
+                                            (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
                                                       VR:$merge,
                                                       vti.RegClass:$rs1,
                                                       (DecImm simm5_plus1:$rs2),
                                                       (vti.Mask V0),
-                                                      (NoX0 GPR:$vl),
+                                                      GPR:$vl,
                                                       vti.SEW)>;
 
-  def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
-                                        (vti.Scalar simm5_plus1:$rs2), GPR:$vl)),
+ def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
+                                        (vti.Scalar simm5_plus1:$rs2),
+                                        (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                                 (DecImm simm5_plus1:$rs2),
-                                                                (NoX0 GPR:$vl),
+                                                                GPR:$vl,
                                                                 vti.SEW)>;
   def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
                                              (vti.Vector vti.RegClass:$rs1),
                                              (vti.Scalar simm5_plus1:$rs2),
                                              (vti.Mask VR:$merge),
-                                             GPR:$vl)),
+                                             (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
                                                       VR:$merge,
                                                       vti.RegClass:$rs1,
                                                       (DecImm simm5_plus1:$rs2),
                                                       (vti.Mask V0),
-                                                      (NoX0 GPR:$vl),
+                                                      GPR:$vl,
                                                       vti.SEW)>;
 
   // Special cases to avoid matching vmsltu.vi 0 (always false) to
   // vmsleu.vi -1 (always true). Instead match to vmsne.vv.
   def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
-                                        (vti.Scalar 0), GPR:$vl)),
+                                        (vti.Scalar 0), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
                                                                vti.RegClass:$rs1,
-                                                               (NoX0 GPR:$vl),
+                                                               GPR:$vl,
                                                                vti.SEW)>;
   def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
                                             (vti.Vector vti.RegClass:$rs1),
                                             (vti.Scalar 0),
                                             (vti.Mask VR:$merge),
-                                            GPR:$vl)),
+                                            (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
                                                      VR:$merge,
                                                      vti.RegClass:$rs1,
                                                      vti.RegClass:$rs1,
                                                      (vti.Mask V0),
-                                                     (NoX0 GPR:$vl),
+                                                     GPR:$vl,
                                                      vti.SEW)>;
 }
 
@@ -4002,18 +3993,18 @@ defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">;
 //===----------------------------------------------------------------------===//
 foreach vti = AllVectors in {
   def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1),
-                                           GPR:$vl)),
+                                           (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
-             $rs1, (NoX0 GPR:$vl), vti.SEW)>;
+             $rs1, GPR:$vl, vti.SEW)>;
 }
 
 foreach vti = AllIntegerVectors in {
-  def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, GPR:$vl)),
+  def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
-             $rs2, (NoX0 GPR:$vl), vti.SEW)>;
-  def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, GPR:$vl)),
+             $rs2, GPR:$vl, vti.SEW)>;
+  def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
-             simm5:$imm5, (NoX0 GPR:$vl), vti.SEW)>;
+             simm5:$imm5, GPR:$vl, vti.SEW)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4157,8 +4148,8 @@ foreach fvti = AllFloatVectors in {
   defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
   def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2),
                                             (fvti.Scalar (fpimm0)),
-                                            (fvti.Mask V0), (XLenVT GPR:$vl))),
-            (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), (NoX0 GPR:$vl), fvti.SEW)>;
+                                            (fvti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
+            (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.SEW)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4167,16 +4158,16 @@ foreach fvti = AllFloatVectors in {
 foreach fvti = AllFloatVectors in {
   // If we're splatting fpimm0, use vmv.v.x vd, x0.
   def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
-                         (fvti.Scalar (fpimm0)), GPR:$vl)),
+                         (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
-             0, (NoX0 GPR:$vl), fvti.SEW)>;
+             0, GPR:$vl, fvti.SEW)>;
 
   def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
-                         (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)),
+                         (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
                                 fvti.LMul.MX)
              (fvti.Scalar fvti.ScalarRegClass:$rs2),
-             (NoX0 GPR:$vl), fvti.SEW)>;
+             GPR:$vl, fvti.SEW)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4321,9 +4312,9 @@ foreach vti = AllIntegerVectors in {
   def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
             (!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.SEW)>;
   def : Pat<(vti.Vector (int_riscv_vmv_s_x (vti.Vector vti.RegClass:$rs1),
-                                           GPR:$rs2, GPR:$vl)),
+                                           GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVMV_S_X_" # vti.LMul.MX)
-             (vti.Vector $rs1), $rs2, (NoX0 GPR:$vl), vti.SEW)>;
+             (vti.Vector $rs1), $rs2, GPR:$vl, vti.SEW)>;
 }
 } // Predicates = [HasStdExtV]
 
@@ -4339,12 +4330,12 @@ foreach fvti = AllFloatVectors in {
                          (instr $rs2, fvti.SEW)>;
 
   def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
-                         (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)),
+                         (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
             (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" #
                                 fvti.LMul.MX)
              (fvti.Vector $rs1),
              (fvti.Scalar fvti.ScalarRegClass:$rs2),
-             (NoX0 GPR:$vl), fvti.SEW)>;
+             GPR:$vl, fvti.SEW)>;
 }
 } // Predicates = [HasStdExtV, HasStdExtF]
 

From ef27138bb6b59ac28b28efdd1e192724ad94a1fa Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <kai.wang@sifive.com>
Date: Mon, 1 Feb 2021 16:08:46 +0800
Subject: [PATCH 088/244] [RISCV] Add new vector instructions in v0.10.

* Add new vector instructions in v0.10.
 - load/store for mask value vle1.v vse1.v
 - vsetivli for 0-31 immediate vector length.
* Rename vector instructions in v0.10.
 - vfrsqrte7 -> vfrsqrt7
 - vfrece7 -> vfrec7
* Reserve memory width encodings for EEW>128b.

Differential Revision: https://reviews.llvm.org/D95781

(cherry picked from commit c7189ba78578d029e0162720319de3c1c6fc348b)
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |    7 +-
 llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp |   37 +-
 .../Target/RISCV/RISCVExpandPseudoInsts.cpp   |   11 +-
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |   14 +-
 llvm/lib/Target/RISCV/RISCVInstrFormatsV.td   |   23 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td      |   98 +-
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    |   38 +-
 .../CodeGen/RISCV/rvv/cleanup-vsetivli.mir    |   46 +
 .../RISCV/rvv/rv32-vsetvli-intrinsics.ll      |    3 +-
 .../RISCV/rvv/rv64-vsetvli-intrinsics.ll      |    3 +-
 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll    |  602 ++++++
 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll    |  602 ++++++
 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll  |  602 ++++++
 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll  |  602 ++++++
 llvm/test/CodeGen/RISCV/rvv/vle1-rv32.ll      |   94 +
 llvm/test/CodeGen/RISCV/rvv/vle1-rv64.ll      |   94 +
 llvm/test/CodeGen/RISCV/rvv/vse1-rv32.ll      |   94 +
 llvm/test/CodeGen/RISCV/rvv/vse1-rv64.ll      |   94 +
 llvm/test/CodeGen/RISCV/vfrece7-rv32.ll       |  602 ------
 llvm/test/CodeGen/RISCV/vfrece7-rv64.ll       |  602 ------
 llvm/test/CodeGen/RISCV/vfrsqrte7-rv32.ll     |  602 ------
 llvm/test/CodeGen/RISCV/vfrsqrte7-rv64.ll     |  602 ------
 llvm/test/MC/RISCV/rvv/fothers.s              |   16 +-
 llvm/test/MC/RISCV/rvv/invalid.s              |    9 +
 llvm/test/MC/RISCV/rvv/load.s                 |  246 +--
 llvm/test/MC/RISCV/rvv/store.s                |  102 +-
 llvm/test/MC/RISCV/rvv/vsetvl.s               |   18 +
 llvm/test/MC/RISCV/rvv/zvlsseg.s              | 1680 -----------------
 28 files changed, 3003 insertions(+), 4540 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vle1-rv32.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vle1-rv64.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vse1-rv32.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vse1-rv64.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/vfrece7-rv32.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/vfrece7-rv64.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/vfrsqrte7-rv32.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/vfrsqrte7-rv64.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index ab5b09b72ac3..c4056895f68e 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -790,6 +790,9 @@ let TargetPrefix = "riscv" in {
   defm vsoxei : RISCVIStore;
   defm vsuxei : RISCVIStore;
 
+  def int_riscv_vle1 : RISCVUSLoad;
+  def int_riscv_vse1 : RISCVUSStore;
+
   defm vamoswap : RISCVAMO;
   defm vamoadd : RISCVAMO;
   defm vamoxor : RISCVAMO;
@@ -940,8 +943,8 @@ let TargetPrefix = "riscv" in {
   defm vfwnmsac : RISCVTernaryWide;
 
   defm vfsqrt : RISCVUnaryAA;
-  defm vfrsqrte7 : RISCVUnaryAA;
-  defm vfrece7 : RISCVUnaryAA;
+  defm vfrsqrt7 : RISCVUnaryAA;
+  defm vfrec7 : RISCVUnaryAA;
 
   defm vfmin : RISCVBinaryAAX;
   defm vfmax : RISCVBinaryAAX;
diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
index 6a12f99b8903..ae32cbd1ae59 100644
--- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
@@ -59,7 +59,8 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
     MachineInstr &MI = *MII++;
 
-    if (MI.getOpcode() != RISCV::PseudoVSETVLI) {
+    if (MI.getOpcode() != RISCV::PseudoVSETVLI &&
+        MI.getOpcode() != RISCV::PseudoVSETIVLI) {
       if (PrevVSETVLI &&
           (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
            MI.modifiesRegister(RISCV::VTYPE))) {
@@ -69,26 +70,48 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       continue;
     }
 
-    // If we don't have a previous VSETVLI or the VL output isn't dead, we
+    // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we
     // can't remove this VSETVLI.
     if (!PrevVSETVLI || !MI.getOperand(0).isDead()) {
       PrevVSETVLI = &MI;
       continue;
     }
 
-    Register PrevAVLReg = PrevVSETVLI->getOperand(1).getReg();
-    Register AVLReg = MI.getOperand(1).getReg();
+    // If a previous "set vl" instruction opcode is different from this one, we
+    // can't differentiate the AVL values.
+    if (PrevVSETVLI->getOpcode() != MI.getOpcode()) {
+      PrevVSETVLI = &MI;
+      continue;
+    }
+
+    // The remaining two cases are
+    // 1. PrevVSETVLI = PseudoVSETVLI
+    //    MI = PseudoVSETVLI
+    //
+    // 2. PrevVSETVLI = PseudoVSETIVLI
+    //    MI = PseudoVSETIVLI
+    Register AVLReg;
+    bool SameAVL = false;
+    if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+      AVLReg = MI.getOperand(1).getReg();
+      SameAVL = PrevVSETVLI->getOperand(1).getReg() == AVLReg;
+    } else { // RISCV::PseudoVSETIVLI
+      SameAVL =
+          PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm();
+    }
     int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
     int64_t VTYPEImm = MI.getOperand(2).getImm();
 
-    // Does this VSETVLI use the same AVL register and VTYPE immediate?
-    if (PrevAVLReg != AVLReg || PrevVTYPEImm != VTYPEImm) {
+    // Does this VSET{I}VLI use the same AVL register/value and VTYPE immediate?
+    if (!SameAVL || PrevVTYPEImm != VTYPEImm) {
       PrevVSETVLI = &MI;
       continue;
     }
 
     // If the AVLReg is X0 we need to look at the output VL of both VSETVLIs.
-    if (AVLReg == RISCV::X0) {
+    if ((MI.getOpcode() == RISCV::PseudoVSETVLI) && (AVLReg == RISCV::X0)) {
+      assert((PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI) &&
+             "Unexpected vsetvli opcode.");
       Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
       Register OutVL = MI.getOperand(0).getReg();
       // We can't remove if the previous VSETVLI left VL unchanged and the
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 5f50892ca886..ec9a39569952 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -103,6 +103,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case RISCV::PseudoLA_TLS_GD:
     return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
   case RISCV::PseudoVSETVLI:
+  case RISCV::PseudoVSETIVLI:
     return expandVSetVL(MBB, MBBI);
   case RISCV::PseudoVMCLR_M_B1:
   case RISCV::PseudoVMCLR_M_B2:
@@ -217,9 +218,15 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
 
   DebugLoc DL = MBBI->getDebugLoc();
 
-  assert(MBBI->getOpcode() == RISCV::PseudoVSETVLI &&
+  assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+          MBBI->getOpcode() == RISCV::PseudoVSETIVLI) &&
          "Unexpected pseudo instruction");
-  const MCInstrDesc &Desc = TII->get(RISCV::VSETVLI);
+  unsigned Opcode;
+  if (MBBI->getOpcode() == RISCV::PseudoVSETVLI)
+    Opcode = RISCV::VSETVLI;
+  else
+    Opcode = RISCV::VSETIVLI;
+  const MCInstrDesc &Desc = TII->get(Opcode);
   assert(Desc.getNumOperands() == 3 && "Unexpected instruction format");
 
   Register DstReg = MBBI->getOperand(0).getReg();
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 517e714cb59f..2121cc38f661 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -569,12 +569,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
       SDValue VLOperand = Node->getOperand(2);
       if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
-        if (C->isNullValue()) {
-          VLOperand = SDValue(
-              CurDAG->getMachineNode(RISCV::ADDI, DL, XLenVT,
-                                     CurDAG->getRegister(RISCV::X0, XLenVT),
-                                     CurDAG->getTargetConstant(0, DL, XLenVT)),
-              0);
+        uint64_t AVL = C->getZExtValue();
+        if (isUInt<5>(AVL)) {
+          SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+          ReplaceNode(Node,
+                      CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
+                                             MVT::Other, VLImm, VTypeIOp,
+                                             /* Chain */ Node->getOperand(0)));
+          return;
         }
       }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
index 147993127e78..80f46b73bfd7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -38,9 +38,11 @@ class RISCVLSUMOP<bits<5> val> {
   bits<5> Value = val;
 }
 def LUMOPUnitStride  : RISCVLSUMOP<0b00000>;
+def LUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
 def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
 def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>;
 def SUMOPUnitStride  : RISCVLSUMOP<0b00000>;
+def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
 def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
 
 class RISCVAMOOP<bits<5> val> {
@@ -63,10 +65,23 @@ def LSWidth8     : RISCVWidth<0b0000>;
 def LSWidth16    : RISCVWidth<0b0101>;
 def LSWidth32    : RISCVWidth<0b0110>;
 def LSWidth64    : RISCVWidth<0b0111>;
-def LSWidth128   : RISCVWidth<0b1000>;
-def LSWidth256   : RISCVWidth<0b1101>;
-def LSWidth512   : RISCVWidth<0b1110>;
-def LSWidth1024  : RISCVWidth<0b1111>;
+
+class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr>
+    : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+  bits<5> uimm;
+  bits<5> rd;
+  bits<10> vtypei;
+
+  let Inst{31} = 1;
+  let Inst{30} = 1;
+  let Inst{29-20} = vtypei{9-0};
+  let Inst{19-15} = uimm;
+  let Inst{14-12} = 0b111;
+  let Inst{11-7} = rd;
+  let Opcode = OPC_OP_V.Value;
+
+  let Defs = [VTYPE, VL];
+}
 
 class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
     : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index e02c9f8bcbe2..86fbc73d81d5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -82,6 +82,12 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
 //===----------------------------------------------------------------------===//
 
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+// load vd, (rs1)
+class VUnitStrideLoadMask<string opcodestr>
+    : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
+                (outs VR:$vd),
+                (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">;
+
 // load vd, (rs1), vm
 class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width,
                       string opcodestr>
@@ -137,6 +143,12 @@ class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
 } // hasSideEffects = 0, mayLoad = 1, mayStore = 0
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+// store vd, vs3, (rs1), vm
+class VUnitStrideStoreMask<string opcodestr>
+    : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
+                (outs), (ins VR:$vs3, GPR:$rs1), opcodestr,
+                "$vs3, (${rs1})">;
+
 // store vd, vs3, (rs1), vm
 class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width,
                          string opcodestr>
@@ -423,10 +435,6 @@ multiclass VWholeLoad<bits<3> nf, string opcodestr> {
   def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v">;
   def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v">;
   def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v">;
-  def E128_V : VWholeLoad<nf, LSWidth128, opcodestr # "e128.v">;
-  def E256_V : VWholeLoad<nf, LSWidth256, opcodestr # "e256.v">;
-  def E512_V : VWholeLoad<nf, LSWidth512, opcodestr # "e512.v">;
-  def E1024_V : VWholeLoad<nf, LSWidth1024, opcodestr # "e1024.v">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -438,6 +446,9 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
 def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei),
                            "vsetvli", "$rd, $rs1, $vtypei">;
 
+def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei),
+                             "vsetivli", "$rd, $uimm, $vtypei">;
+
 def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
                          "vsetvl", "$rd, $rs1, $rs2">;
 } // hasSideEffects = 1, mayLoad = 0, mayStore = 0
@@ -447,47 +458,30 @@ def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
 def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
 def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
 def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-def VLE128_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth128, "vle128.v">;
-def VLE256_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth256, "vle256.v">;
-def VLE512_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth512, "vle512.v">;
-def VLE1024_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth1024, "vle1024.v">;
 
 def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
 def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
 def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
 def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-def VLE128FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth128, "vle128ff.v">;
-def VLE256FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth256, "vle256ff.v">;
-def VLE512FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth512, "vle512ff.v">;
-def VLE1024FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth1024, "vle1024ff.v">;
+
+def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
+def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
 
 def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
 def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
 def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
 def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
-def VSE128_V : VUnitStrideStore<SUMOPUnitStride, LSWidth128, "vse128.v">;
-def VSE256_V : VUnitStrideStore<SUMOPUnitStride, LSWidth256, "vse256.v">;
-def VSE512_V : VUnitStrideStore<SUMOPUnitStride, LSWidth512, "vse512.v">;
-def VSE1024_V : VUnitStrideStore<SUMOPUnitStride, LSWidth1024, "vse1024.v">;
 
 // Vector Strided Instructions
 def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
 def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
 def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
 def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-def VLSE128_V : VStridedLoad<LSWidth128, "vlse128.v">;
-def VLSE256_V : VStridedLoad<LSWidth256, "vlse256.v">;
-def VLSE512_V : VStridedLoad<LSWidth512, "vlse512.v">;
-def VLSE1024_V : VStridedLoad<LSWidth1024, "vlse1024.v">;
 
 def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
 def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
 def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
 def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
-def VSSE128_V : VStridedStore<LSWidth128, "vsse128.v">;
-def VSSE256_V : VStridedStore<LSWidth256, "vsse256.v">;
-def VSSE512_V : VStridedStore<LSWidth512, "vsse512.v">;
-def VSSE1024_V : VStridedStore<LSWidth1024, "vsse1024.v">;
 
 // Vector Indexed Instructions
 def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
@@ -806,8 +800,8 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
 
 // Vector Floating-Point Square-Root Instruction
 defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRTE7_V : VALU_FV_VS2<"vfrsqrte7.v", 0b010011, 0b00100>;
-defm VFRECE7_V : VALU_FV_VS2<"vfrece7.v", 0b010011, 0b00101>;
+defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
 
 // Vector Floating-Point MIN/MAX Instructions
 defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
@@ -1058,47 +1052,27 @@ let Predicates = [HasStdExtZvlsseg] in {
     def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">;
     def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">;
     def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">;
-    def VLSEG#nf#E128_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth128, "vlseg"#nf#"e128.v">;
-    def VLSEG#nf#E256_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth256, "vlseg"#nf#"e256.v">;
-    def VLSEG#nf#E512_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth512, "vlseg"#nf#"e512.v">;
-    def VLSEG#nf#E1024_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth1024, "vlseg"#nf#"e1024.v">;
 
     def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">;
     def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">;
     def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">;
     def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">;
-    def VLSEG#nf#E128FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth128, "vlseg"#nf#"e128ff.v">;
-    def VLSEG#nf#E256FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth256, "vlseg"#nf#"e256ff.v">;
-    def VLSEG#nf#E512FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth512, "vlseg"#nf#"e512ff.v">;
-    def VLSEG#nf#E1024FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth1024, "vlseg"#nf#"e1024ff.v">;
 
     def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">;
     def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">;
     def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">;
     def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
-    def VSSEG#nf#E128_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth128, "vsseg"#nf#"e128.v">;
-    def VSSEG#nf#E256_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth256, "vsseg"#nf#"e256.v">;
-    def VSSEG#nf#E512_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth512, "vsseg"#nf#"e512.v">;
-    def VSSEG#nf#E1024_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth1024, "vsseg"#nf#"e1024.v">;
 
     // Vector Strided Instructions
     def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">;
     def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">;
     def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">;
     def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
-    def VLSSEG#nf#E128_V : VStridedSegmentLoad<!add(nf, -1), LSWidth128, "vlsseg"#nf#"e128.v">;
-    def VLSSEG#nf#E256_V : VStridedSegmentLoad<!add(nf, -1), LSWidth256, "vlsseg"#nf#"e256.v">;
-    def VLSSEG#nf#E512_V : VStridedSegmentLoad<!add(nf, -1), LSWidth512, "vlsseg"#nf#"e512.v">;
-    def VLSSEG#nf#E1024_V : VStridedSegmentLoad<!add(nf, -1), LSWidth1024, "vlsseg"#nf#"e1024.v">;
 
     def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">;
     def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">;
     def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">;
     def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
-    def VSSSEG#nf#E128_V : VStridedSegmentStore<!add(nf, -1), LSWidth128, "vssseg"#nf#"e128.v">;
-    def VSSSEG#nf#E256_V : VStridedSegmentStore<!add(nf, -1), LSWidth256, "vssseg"#nf#"e256.v">;
-    def VSSSEG#nf#E512_V : VStridedSegmentStore<!add(nf, -1), LSWidth512, "vssseg"#nf#"e512.v">;
-    def VSSSEG#nf#E1024_V : VStridedSegmentStore<!add(nf, -1), LSWidth1024, "vssseg"#nf#"e1024.v">;
 
     // Vector Indexed Instructions
     def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
@@ -1109,14 +1083,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vluxseg"#nf#"ei32.v">;
     def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
                               LSWidth64, "vluxseg"#nf#"ei64.v">;
-    def VLUXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth128, "vluxseg"#nf#"ei128.v">;
-    def VLUXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth256, "vluxseg"#nf#"ei256.v">;
-    def VLUXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth512, "vluxseg"#nf#"ei512.v">;
-    def VLUXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
-                               LSWidth1024, "vluxseg"#nf#"ei1024.v">;
 
     def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
                              LSWidth8, "vloxseg"#nf#"ei8.v">;
@@ -1126,14 +1092,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vloxseg"#nf#"ei32.v">;
     def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
                               LSWidth64, "vloxseg"#nf#"ei64.v">;
-    def VLOXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth128, "vloxseg"#nf#"ei128.v">;
-    def VLOXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth256, "vloxseg"#nf#"ei256.v">;
-    def VLOXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth512, "vloxseg"#nf#"ei512.v">;
-    def VLOXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
-                               LSWidth1024, "vloxseg"#nf#"ei1024.v">;
 
     def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
                              LSWidth8, "vsuxseg"#nf#"ei8.v">;
@@ -1143,14 +1101,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vsuxseg"#nf#"ei32.v">;
     def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
                               LSWidth64, "vsuxseg"#nf#"ei64.v">;
-    def VSUXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                               LSWidth128, "vsuxseg"#nf#"ei128.v">;
-    def VSUXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                               LSWidth256, "vsuxseg"#nf#"ei256.v">;
-    def VSUXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                               LSWidth512, "vsuxseg"#nf#"ei512.v">;
-    def VSUXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
-                                LSWidth1024, "vsuxseg"#nf#"ei1024.v">;
 
     def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
                              LSWidth8, "vsoxseg"#nf#"ei8.v">;
@@ -1160,14 +1110,6 @@ let Predicates = [HasStdExtZvlsseg] in {
                               LSWidth32, "vsoxseg"#nf#"ei32.v">;
     def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
                               LSWidth64, "vsoxseg"#nf#"ei64.v">;
-    def VSOXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                               LSWidth128, "vsoxseg"#nf#"ei128.v">;
-    def VSOXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                               LSWidth256, "vsoxseg"#nf#"ei256.v">;
-    def VSOXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                               LSWidth512, "vsoxseg"#nf#"ei512.v">;
-    def VSOXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
-                                LSWidth1024, "vsoxseg"#nf#"ei1024.v">;
   }
 } // Predicates = [HasStdExtZvlsseg]
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index fa17f2d87eff..60bd1b24cab8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1218,6 +1218,14 @@ multiclass VPseudoUSLoad {
   }
 }
 
+multiclass VPseudoLoadMask {
+  foreach mti = AllMasks in {
+    let VLMul = mti.LMul.value in {
+      def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR>;
+    }
+  }
+}
+
 multiclass VPseudoSLoad {
   foreach lmul = MxList.m in {
     defvar LInfo = lmul.MX;
@@ -1254,6 +1262,14 @@ multiclass VPseudoUSStore {
   }
 }
 
+multiclass VPseudoStoreMask {
+  foreach mti = AllMasks in {
+    let VLMul = mti.LMul.value in {
+      def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR>;
+    }
+  }
+}
+
 multiclass VPseudoSStore {
   foreach lmul = MxList.m in {
     defvar LInfo = lmul.MX;
@@ -3115,7 +3131,7 @@ def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
 // Pseudos.
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
 def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>;
-
+def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3132,6 +3148,9 @@ foreach eew = EEWList in {
   defm PseudoVSE # eew : VPseudoUSStore;
 }
 
+defm PseudoVLE1 : VPseudoLoadMask;
+defm PseudoVSE1 : VPseudoStoreMask;
+
 //===----------------------------------------------------------------------===//
 // 7.5 Vector Strided Instructions
 //===----------------------------------------------------------------------===//
@@ -3427,12 +3446,12 @@ defm PseudoVFSQRT      : VPseudoUnaryV_V;
 //===----------------------------------------------------------------------===//
 // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm PseudoVFRSQRTE7   : VPseudoUnaryV_V;
+defm PseudoVFRSQRT7    : VPseudoUnaryV_V;
 
 //===----------------------------------------------------------------------===//
 // 14.10. Vector Floating-Point Reciprocal Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm PseudoVFRECE7     : VPseudoUnaryV_V;
+defm PseudoVFREC7      : VPseudoUnaryV_V;
 
 //===----------------------------------------------------------------------===//
 // 14.11. Vector Floating-Point Min/Max Instructions
@@ -3709,6 +3728,15 @@ foreach vti = AllVectors in
                      vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
 }
 
+foreach vti = AllMasks in {
+  defvar PseudoVLE1 = !cast<Instruction>("PseudoVLE1_V_"#vti.BX);
+  def : Pat<(vti.Mask (int_riscv_vle1 GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+            (PseudoVLE1 $rs1, GPR:$vl, vti.SEW)>;
+  defvar PseudoVSE1 = !cast<Instruction>("PseudoVSE1_V_"#vti.BX);
+  def : Pat<(int_riscv_vse1 (vti.Mask VR:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+            (PseudoVSE1 $rs3, $rs1, GPR:$vl, vti.SEW)>;
+}
+
 //===----------------------------------------------------------------------===//
 // 7.5 Vector Strided Instructions
 //===----------------------------------------------------------------------===//
@@ -4100,12 +4128,12 @@ defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
 //===----------------------------------------------------------------------===//
 // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm "" : VPatUnaryV_V<"int_riscv_vfrsqrte7", "PseudoVFRSQRTE7", AllFloatVectors>;
+defm "" : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>;
 
 //===----------------------------------------------------------------------===//
 // 14.10. Vector Floating-Point Reciprocal Estimate Instruction
 //===----------------------------------------------------------------------===//
-defm "" : VPatUnaryV_V<"int_riscv_vfrece7", "PseudoVFRECE7", AllFloatVectors>;
+defm "" : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
 
 //===----------------------------------------------------------------------===//
 // 14.11. Vector Floating-Point Min/Max Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir
new file mode 100644
index 000000000000..ed8bc5698062
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
+
+# Make sure we don't combine these VSET{I}VLIs in the cleanup pass. We could not
+# differentiate AVL values if the opcode of the previous one is different from
+# current one.
+
+--- |
+  ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll'
+  source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll"
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+  target triple = "riscv64"
+
+  define void @cleanup_vsetivli() #0 {
+    ret void
+  }
+
+  attributes #0 = { "target-features"="+experimental-v" }
+
+...
+---
+name:            cleanup_vsetivli
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    ; CHECK-LABEL: name: cleanup_vsetivli
+    ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
+    ; CHECK: PseudoRET
+    dead %0:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    dead %1:gpr  = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
+    dead %2:gpr  = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
+    dead %3:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    dead %4:gpr  = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
+    dead %5:gpr  = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype
+    PseudoRET
+
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index d724d0df9692..5e97df06470c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -16,8 +16,7 @@ define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
 define void @test_vsetvli_e8mf2_zero_avl() nounwind {
 ; CHECK-LABEL: test_vsetvli_e8mf2_zero_avl:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:    vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT:    vsetivli a0, 0, e8,mf2,ta,mu
 ; CHECK-NEXT:    ret
   call i32 @llvm.riscv.vsetvli.i32(i32 0, i32 0, i32 7)
   ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index c28058c16efb..78d1008ce28b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -25,8 +25,7 @@ define void @test_vsetvli_e16mf4(i64 %avl) nounwind {
 define void @test_vsetvli_e32mf8_zero_avl() nounwind {
 ; CHECK-LABEL: test_vsetvli_e32mf8_zero_avl:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    mv a0, zero
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vsetivli a0, 0, e16,mf4,ta,mu
 ; CHECK-NEXT:    ret
   call i64 @llvm.riscv.vsetvli.i64(i64 0, i64 1, i64 6)
   ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
new file mode 100644
index 000000000000..244903fadb32
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
@@ -0,0 +1,602 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
+  <vscale x 1 x half>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vfrec7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
+    <vscale x 1 x half> %0,
+    i32 %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    <vscale x 1 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
+  <vscale x 2 x half>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vfrec7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
+    <vscale x 2 x half> %0,
+    i32 %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
+    <vscale x 2 x half> %1,
+    <vscale x 2 x half> %2,
+    <vscale x 2 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
+  <vscale x 4 x half>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vfrec7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
+    <vscale x 4 x half> %0,
+    i32 %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
+    <vscale x 4 x half> %1,
+    <vscale x 4 x half> %2,
+    <vscale x 4 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
+  <vscale x 8 x half>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vfrec7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
+    <vscale x 8 x half> %0,
+    i32 %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
+    <vscale x 8 x half> %1,
+    <vscale x 8 x half> %2,
+    <vscale x 8 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
+  <vscale x 16 x half>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vfrec7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
+    <vscale x 16 x half> %0,
+    i32 %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
+    <vscale x 16 x half> %1,
+    <vscale x 16 x half> %2,
+    <vscale x 16 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
+  <vscale x 32 x half>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vfrec7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
+    <vscale x 32 x half> %0,
+    i32 %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
+    <vscale x 32 x half> %1,
+    <vscale x 32 x half> %2,
+    <vscale x 32 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
+  <vscale x 1 x float>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vfrec7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
+    <vscale x 1 x float> %0,
+    i32 %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
+    <vscale x 1 x float> %1,
+    <vscale x 1 x float> %2,
+    <vscale x 1 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
+  <vscale x 2 x float>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfrec7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
+    <vscale x 2 x float> %0,
+    i32 %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
+    <vscale x 2 x float> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 2 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
+  <vscale x 4 x float>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vfrec7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
+    <vscale x 4 x float> %0,
+    i32 %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
+    <vscale x 4 x float> %1,
+    <vscale x 4 x float> %2,
+    <vscale x 4 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
+  <vscale x 8 x float>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vfrec7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
+    <vscale x 8 x float> %0,
+    i32 %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
+    <vscale x 8 x float> %1,
+    <vscale x 8 x float> %2,
+    <vscale x 8 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
+  <vscale x 16 x float>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vfrec7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
+    <vscale x 16 x float> %0,
+    i32 %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
+    <vscale x 16 x float> %1,
+    <vscale x 16 x float> %2,
+    <vscale x 16 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
+  <vscale x 1 x double>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vfrec7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
+    <vscale x 1 x double> %0,
+    i32 %1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
+    <vscale x 1 x double> %1,
+    <vscale x 1 x double> %2,
+    <vscale x 1 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
+  <vscale x 2 x double>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vfrec7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
+    <vscale x 2 x double> %0,
+    i32 %1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
+    <vscale x 2 x double> %1,
+    <vscale x 2 x double> %2,
+    <vscale x 2 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
+  <vscale x 4 x double>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vfrec7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
+    <vscale x 4 x double> %0,
+    i32 %1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
+    <vscale x 4 x double> %1,
+    <vscale x 4 x double> %2,
+    <vscale x 4 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
+  <vscale x 8 x double>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vfrec7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
+    <vscale x 8 x double> %0,
+    i32 %1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
+    <vscale x 8 x double> %1,
+    <vscale x 8 x double> %2,
+    <vscale x 8 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
new file mode 100644
index 000000000000..7b24fb9b0238
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
@@ -0,0 +1,602 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
+  <vscale x 1 x half>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vfrec7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
+    <vscale x 1 x half> %0,
+    i64 %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    <vscale x 1 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
+  <vscale x 2 x half>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vfrec7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
+    <vscale x 2 x half> %0,
+    i64 %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
+    <vscale x 2 x half> %1,
+    <vscale x 2 x half> %2,
+    <vscale x 2 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
+  <vscale x 4 x half>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vfrec7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
+    <vscale x 4 x half> %0,
+    i64 %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
+    <vscale x 4 x half> %1,
+    <vscale x 4 x half> %2,
+    <vscale x 4 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
+  <vscale x 8 x half>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vfrec7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
+    <vscale x 8 x half> %0,
+    i64 %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
+    <vscale x 8 x half> %1,
+    <vscale x 8 x half> %2,
+    <vscale x 8 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
+  <vscale x 16 x half>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vfrec7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
+    <vscale x 16 x half> %0,
+    i64 %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
+    <vscale x 16 x half> %1,
+    <vscale x 16 x half> %2,
+    <vscale x 16 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
+  <vscale x 32 x half>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vfrec7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
+    <vscale x 32 x half> %0,
+    i64 %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
+    <vscale x 32 x half> %1,
+    <vscale x 32 x half> %2,
+    <vscale x 32 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
+  <vscale x 1 x float>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vfrec7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
+    <vscale x 1 x float> %0,
+    i64 %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
+    <vscale x 1 x float> %1,
+    <vscale x 1 x float> %2,
+    <vscale x 1 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
+  <vscale x 2 x float>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfrec7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
+    <vscale x 2 x float> %0,
+    i64 %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
+    <vscale x 2 x float> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 2 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
+  <vscale x 4 x float>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vfrec7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
+    <vscale x 4 x float> %0,
+    i64 %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
+    <vscale x 4 x float> %1,
+    <vscale x 4 x float> %2,
+    <vscale x 4 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
+  <vscale x 8 x float>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vfrec7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
+    <vscale x 8 x float> %0,
+    i64 %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
+    <vscale x 8 x float> %1,
+    <vscale x 8 x float> %2,
+    <vscale x 8 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
+  <vscale x 16 x float>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vfrec7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
+    <vscale x 16 x float> %0,
+    i64 %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
+    <vscale x 16 x float> %1,
+    <vscale x 16 x float> %2,
+    <vscale x 16 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
+  <vscale x 1 x double>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfrec7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
+    <vscale x 1 x double> %0,
+    i64 %1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
+    <vscale x 1 x double> %1,
+    <vscale x 1 x double> %2,
+    <vscale x 1 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
+  <vscale x 2 x double>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vfrec7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
+    <vscale x 2 x double> %0,
+    i64 %1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
+    <vscale x 2 x double> %1,
+    <vscale x 2 x double> %2,
+    <vscale x 2 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
+  <vscale x 4 x double>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vfrec7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
+    <vscale x 4 x double> %0,
+    i64 %1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
+    <vscale x 4 x double> %1,
+    <vscale x 4 x double> %2,
+    <vscale x 4 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
+  <vscale x 8 x double>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vfrec7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT:    vfrec7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
+    <vscale x 8 x double> %0,
+    i64 %1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
+; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
+    <vscale x 8 x double> %1,
+    <vscale x 8 x double> %2,
+    <vscale x 8 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
new file mode 100644
index 000000000000..2740ecf3acff
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
@@ -0,0 +1,602 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
+  <vscale x 1 x half>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
+    <vscale x 1 x half> %0,
+    i32 %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    <vscale x 1 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
+  <vscale x 2 x half>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
+    <vscale x 2 x half> %0,
+    i32 %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
+    <vscale x 2 x half> %1,
+    <vscale x 2 x half> %2,
+    <vscale x 2 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
+  <vscale x 4 x half>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
+    <vscale x 4 x half> %0,
+    i32 %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
+    <vscale x 4 x half> %1,
+    <vscale x 4 x half> %2,
+    <vscale x 4 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
+  <vscale x 8 x half>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
+    <vscale x 8 x half> %0,
+    i32 %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
+    <vscale x 8 x half> %1,
+    <vscale x 8 x half> %2,
+    <vscale x 8 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
+  <vscale x 16 x half>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
+    <vscale x 16 x half> %0,
+    i32 %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
+    <vscale x 16 x half> %1,
+    <vscale x 16 x half> %2,
+    <vscale x 16 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
+  <vscale x 32 x half>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
+    <vscale x 32 x half> %0,
+    i32 %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
+    <vscale x 32 x half> %1,
+    <vscale x 32 x half> %2,
+    <vscale x 32 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
+  <vscale x 1 x float>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
+    <vscale x 1 x float> %0,
+    i32 %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
+    <vscale x 1 x float> %1,
+    <vscale x 1 x float> %2,
+    <vscale x 1 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
+  <vscale x 2 x float>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
+    <vscale x 2 x float> %0,
+    i32 %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
+    <vscale x 2 x float> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 2 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
+  <vscale x 4 x float>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
+    <vscale x 4 x float> %0,
+    i32 %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
+    <vscale x 4 x float> %1,
+    <vscale x 4 x float> %2,
+    <vscale x 4 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
+  <vscale x 8 x float>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
+    <vscale x 8 x float> %0,
+    i32 %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
+    <vscale x 8 x float> %1,
+    <vscale x 8 x float> %2,
+    <vscale x 8 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
+  <vscale x 16 x float>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
+    <vscale x 16 x float> %0,
+    i32 %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
+    <vscale x 16 x float> %1,
+    <vscale x 16 x float> %2,
+    <vscale x 16 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
+  <vscale x 1 x double>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
+    <vscale x 1 x double> %0,
+    i32 %1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
+    <vscale x 1 x double> %1,
+    <vscale x 1 x double> %2,
+    <vscale x 1 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
+  <vscale x 2 x double>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
+    <vscale x 2 x double> %0,
+    i32 %1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
+    <vscale x 2 x double> %1,
+    <vscale x 2 x double> %2,
+    <vscale x 2 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
+  <vscale x 4 x double>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
+    <vscale x 4 x double> %0,
+    i32 %1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
+    <vscale x 4 x double> %1,
+    <vscale x 4 x double> %2,
+    <vscale x 4 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
+  <vscale x 8 x double>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
+    <vscale x 8 x double> %0,
+    i32 %1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
+    <vscale x 8 x double> %1,
+    <vscale x 8 x double> %2,
+    <vscale x 8 x i1> %0,
+    i32 %3)
+
+  ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
new file mode 100644
index 000000000000..3ea0f1d9eb6a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
@@ -0,0 +1,602 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
+  <vscale x 1 x half>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
+    <vscale x 1 x half> %0,
+    i64 %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
+    <vscale x 1 x half> %1,
+    <vscale x 1 x half> %2,
+    <vscale x 1 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
+  <vscale x 2 x half>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
+    <vscale x 2 x half> %0,
+    i64 %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
+    <vscale x 2 x half> %1,
+    <vscale x 2 x half> %2,
+    <vscale x 2 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
+  <vscale x 4 x half>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
+    <vscale x 4 x half> %0,
+    i64 %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
+    <vscale x 4 x half> %1,
+    <vscale x 4 x half> %2,
+    <vscale x 4 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
+  <vscale x 8 x half>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
+    <vscale x 8 x half> %0,
+    i64 %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
+    <vscale x 8 x half> %1,
+    <vscale x 8 x half> %2,
+    <vscale x 8 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
+  <vscale x 16 x half>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
+    <vscale x 16 x half> %0,
+    i64 %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
+    <vscale x 16 x half> %1,
+    <vscale x 16 x half> %2,
+    <vscale x 16 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
+  <vscale x 32 x half>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
+    <vscale x 32 x half> %0,
+    i64 %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
+    <vscale x 32 x half> %1,
+    <vscale x 32 x half> %2,
+    <vscale x 32 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
+  <vscale x 1 x float>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
+    <vscale x 1 x float> %0,
+    i64 %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
+    <vscale x 1 x float> %1,
+    <vscale x 1 x float> %2,
+    <vscale x 1 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
+  <vscale x 2 x float>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
+    <vscale x 2 x float> %0,
+    i64 %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
+    <vscale x 2 x float> %1,
+    <vscale x 2 x float> %2,
+    <vscale x 2 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
+  <vscale x 4 x float>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
+    <vscale x 4 x float> %0,
+    i64 %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
+    <vscale x 4 x float> %1,
+    <vscale x 4 x float> %2,
+    <vscale x 4 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
+  <vscale x 8 x float>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
+    <vscale x 8 x float> %0,
+    i64 %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
+    <vscale x 8 x float> %1,
+    <vscale x 8 x float> %2,
+    <vscale x 8 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
+  <vscale x 16 x float>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
+    <vscale x 16 x float> %0,
+    i64 %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
+    <vscale x 16 x float> %1,
+    <vscale x 16 x float> %2,
+    <vscale x 16 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
+  <vscale x 1 x double>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
+    <vscale x 1 x double> %0,
+    i64 %1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
+    <vscale x 1 x double> %1,
+    <vscale x 1 x double> %2,
+    <vscale x 1 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
+  <vscale x 2 x double>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
+    <vscale x 2 x double> %0,
+    i64 %1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
+    <vscale x 2 x double> %1,
+    <vscale x 2 x double> %2,
+    <vscale x 2 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
+  <vscale x 4 x double>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
+    <vscale x 4 x double> %0,
+    i64 %1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
+    <vscale x 4 x double> %1,
+    <vscale x 4 x double> %2,
+    <vscale x 4 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
+  <vscale x 8 x double>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v8
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
+    <vscale x 8 x double> %0,
+    i64 %1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
+; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
+    <vscale x 8 x double> %1,
+    <vscale x 8 x double> %2,
+    <vscale x 8 x i1> %0,
+    i64 %3)
+
+  ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle1-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vle1-rv32.ll
new file mode 100644
index 000000000000..f7040f7885d5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vle1-rv32.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vle1.nxv1i1(<vscale x 1 x i1>*, i32);
+
+define <vscale x 1 x i1> @intrinsic_vle1_v_nxv1i1(<vscale x 1 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i1> @llvm.riscv.vle1.nxv1i1(<vscale x 1 x i1>* %0, i32 %1)
+  ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vle1.nxv2i1(<vscale x 2 x i1>*, i32);
+
+define <vscale x 2 x i1> @intrinsic_vle1_v_nxv2i1(<vscale x 2 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i1> @llvm.riscv.vle1.nxv2i1(<vscale x 2 x i1>* %0, i32 %1)
+  ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vle1.nxv4i1(<vscale x 4 x i1>*, i32);
+
+define <vscale x 4 x i1> @intrinsic_vle1_v_nxv4i1(<vscale x 4 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i1> @llvm.riscv.vle1.nxv4i1(<vscale x 4 x i1>* %0, i32 %1)
+  ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vle1.nxv8i1(<vscale x 8 x i1>*, i32);
+
+define <vscale x 8 x i1> @intrinsic_vle1_v_nxv8i1(<vscale x 8 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i1> @llvm.riscv.vle1.nxv8i1(<vscale x 8 x i1>* %0, i32 %1)
+  ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vle1.nxv16i1(<vscale x 16 x i1>*, i32);
+
+define <vscale x 16 x i1> @intrinsic_vle1_v_nxv16i1(<vscale x 16 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i1> @llvm.riscv.vle1.nxv16i1(<vscale x 16 x i1>* %0, i32 %1)
+  ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 32 x i1> @llvm.riscv.vle1.nxv32i1(<vscale x 32 x i1>*, i32);
+
+define <vscale x 32 x i1> @intrinsic_vle1_v_nxv32i1(<vscale x 32 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x i1> @llvm.riscv.vle1.nxv32i1(<vscale x 32 x i1>* %0, i32 %1)
+  ret <vscale x 32 x i1> %a
+}
+
+declare <vscale x 64 x i1> @llvm.riscv.vle1.nxv64i1(<vscale x 64 x i1>*, i32);
+
+define <vscale x 64 x i1> @intrinsic_vle1_v_nxv64i1(<vscale x 64 x i1>* %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 64 x i1> @llvm.riscv.vle1.nxv64i1(<vscale x 64 x i1>* %0, i32 %1)
+  ret <vscale x 64 x i1> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle1-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vle1-rv64.ll
new file mode 100644
index 000000000000..46c91f5f6b39
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vle1-rv64.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vle1.nxv1i1(<vscale x 1 x i1>*, i64);
+
+define <vscale x 1 x i1> @intrinsic_vle1_v_nxv1i1(<vscale x 1 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 1 x i1> @llvm.riscv.vle1.nxv1i1(<vscale x 1 x i1>* %0, i64 %1)
+  ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vle1.nxv2i1(<vscale x 2 x i1>*, i64);
+
+define <vscale x 2 x i1> @intrinsic_vle1_v_nxv2i1(<vscale x 2 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 2 x i1> @llvm.riscv.vle1.nxv2i1(<vscale x 2 x i1>* %0, i64 %1)
+  ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vle1.nxv4i1(<vscale x 4 x i1>*, i64);
+
+define <vscale x 4 x i1> @intrinsic_vle1_v_nxv4i1(<vscale x 4 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 4 x i1> @llvm.riscv.vle1.nxv4i1(<vscale x 4 x i1>* %0, i64 %1)
+  ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vle1.nxv8i1(<vscale x 8 x i1>*, i64);
+
+define <vscale x 8 x i1> @intrinsic_vle1_v_nxv8i1(<vscale x 8 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 8 x i1> @llvm.riscv.vle1.nxv8i1(<vscale x 8 x i1>* %0, i64 %1)
+  ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vle1.nxv16i1(<vscale x 16 x i1>*, i64);
+
+define <vscale x 16 x i1> @intrinsic_vle1_v_nxv16i1(<vscale x 16 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 16 x i1> @llvm.riscv.vle1.nxv16i1(<vscale x 16 x i1>* %0, i64 %1)
+  ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 32 x i1> @llvm.riscv.vle1.nxv32i1(<vscale x 32 x i1>*, i64);
+
+define <vscale x 32 x i1> @intrinsic_vle1_v_nxv32i1(<vscale x 32 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 32 x i1> @llvm.riscv.vle1.nxv32i1(<vscale x 32 x i1>* %0, i64 %1)
+  ret <vscale x 32 x i1> %a
+}
+
+declare <vscale x 64 x i1> @llvm.riscv.vle1.nxv64i1(<vscale x 64 x i1>*, i64);
+
+define <vscale x 64 x i1> @intrinsic_vle1_v_nxv64i1(<vscale x 64 x i1>* %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vle1_v_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT:    vle1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call <vscale x 64 x i1> @llvm.riscv.vle1.nxv64i1(<vscale x 64 x i1>* %0, i64 %1)
+  ret <vscale x 64 x i1> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vse1-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vse1-rv32.ll
new file mode 100644
index 000000000000..d94125d7e6ad
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vse1-rv32.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+
+declare void @llvm.riscv.vse1.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i32 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i32 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i32 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i32 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i32 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i32 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>*, i32);
+
+define void @intrinsic_vse1_v_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i32 %2)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vse1-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vse1-rv64.ll
new file mode 100644
index 000000000000..48d4585c01cd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vse1-rv64.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN:   --riscv-no-aliases < %s | FileCheck %s
+
+declare void @llvm.riscv.vse1.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i64 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i64 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i64 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i64 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i64 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i64 %2)
+  ret void
+}
+
+declare void @llvm.riscv.vse1.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>*, i64);
+
+define void @intrinsic_vse1_v_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vse1_v_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT:    vse1.v v0, (a0)
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  call void @llvm.riscv.vse1.nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i64 %2)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/vfrece7-rv32.ll b/llvm/test/CodeGen/RISCV/vfrece7-rv32.ll
deleted file mode 100644
index 7a810f10d47d..000000000000
--- a/llvm/test/CodeGen/RISCV/vfrece7-rv32.ll
+++ /dev/null
@@ -1,602 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
-; RUN:   --riscv-no-aliases < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrece7.nxv1f16(
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfrece7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrece7.nxv1f16(
-    <vscale x 1 x half> %0,
-    i32 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrece7.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfrece7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrece7.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrece7.nxv2f16(
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfrece7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrece7.nxv2f16(
-    <vscale x 2 x half> %0,
-    i32 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrece7.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfrece7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrece7.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrece7.nxv4f16(
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfrece7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrece7.nxv4f16(
-    <vscale x 4 x half> %0,
-    i32 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrece7.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfrece7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrece7.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrece7.nxv8f16(
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfrece7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrece7.nxv8f16(
-    <vscale x 8 x half> %0,
-    i32 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrece7.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfrece7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrece7.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrece7.nxv16f16(
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfrece7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrece7.nxv16f16(
-    <vscale x 16 x half> %0,
-    i32 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrece7.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfrece7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrece7.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrece7.nxv32f16(
-  <vscale x 32 x half>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfrece7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrece7.nxv32f16(
-    <vscale x 32 x half> %0,
-    i32 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrece7.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfrece7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrece7.mask.nxv32f16(
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrece7.nxv1f32(
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfrece7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrece7.nxv1f32(
-    <vscale x 1 x float> %0,
-    i32 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrece7.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfrece7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrece7.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrece7.nxv2f32(
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfrece7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrece7.nxv2f32(
-    <vscale x 2 x float> %0,
-    i32 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrece7.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfrece7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrece7.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrece7.nxv4f32(
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfrece7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrece7.nxv4f32(
-    <vscale x 4 x float> %0,
-    i32 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrece7.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfrece7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrece7.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrece7.nxv8f32(
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfrece7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrece7.nxv8f32(
-    <vscale x 8 x float> %0,
-    i32 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrece7.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfrece7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrece7.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrece7.nxv16f32(
-  <vscale x 16 x float>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfrece7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrece7.nxv16f32(
-    <vscale x 16 x float> %0,
-    i32 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrece7.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfrece7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrece7.mask.nxv16f32(
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrece7.nxv1f64(
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfrece7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrece7.nxv1f64(
-    <vscale x 1 x double> %0,
-    i32 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrece7.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfrece7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrece7.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrece7.nxv2f64(
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfrece7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrece7.nxv2f64(
-    <vscale x 2 x double> %0,
-    i32 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrece7.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfrece7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrece7.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrece7.nxv4f64(
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfrece7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrece7.nxv4f64(
-    <vscale x 4 x double> %0,
-    i32 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrece7.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfrece7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrece7.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrece7.nxv8f64(
-  <vscale x 8 x double>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfrece7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrece7.nxv8f64(
-    <vscale x 8 x double> %0,
-    i32 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrece7.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfrece7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrece7.mask.nxv8f64(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/vfrece7-rv64.ll b/llvm/test/CodeGen/RISCV/vfrece7-rv64.ll
deleted file mode 100644
index 3af3fe4078c5..000000000000
--- a/llvm/test/CodeGen/RISCV/vfrece7-rv64.ll
+++ /dev/null
@@ -1,602 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
-; RUN:   --riscv-no-aliases < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrece7.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrece7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrece7.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrece7.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrece7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrece7.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrece7.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrece7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrece7.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrece7.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrece7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrece7.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrece7.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrece7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrece7.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrece7.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrece7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrece7.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrece7.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrece7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrece7.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrece7.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrece7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrece7.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrece7.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrece7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrece7.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrece7.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrece7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrece7.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrece7.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrece7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrece7.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrece7.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrece7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrece7.mask.nxv32f16(
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrece7.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrece7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrece7.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrece7.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrece7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrece7.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrece7.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrece7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrece7.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrece7.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrece7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrece7.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrece7.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrece7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrece7.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrece7.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrece7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrece7.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrece7.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrece7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrece7.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrece7.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrece7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrece7.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrece7.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrece7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrece7.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrece7.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrece7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrece7.mask.nxv16f32(
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrece7.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrece7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrece7.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrece7.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrece7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrece7.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrece7.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrece7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrece7.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrece7.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrece7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrece7.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrece7.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrece7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrece7.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrece7.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrece7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrece7.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrece7.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrece7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
-; CHECK-NEXT:    vfrece7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrece7.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrece7.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrece7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrece7_mask_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
-; CHECK-NEXT:    vfrece7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrece7.mask.nxv8f64(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/vfrsqrte7-rv32.ll b/llvm/test/CodeGen/RISCV/vfrsqrte7-rv32.ll
deleted file mode 100644
index 083b411ef3c8..000000000000
--- a/llvm/test/CodeGen/RISCV/vfrsqrte7-rv32.ll
+++ /dev/null
@@ -1,602 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
-; RUN:   --riscv-no-aliases < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrsqrte7.nxv1f16(
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfrsqrte7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrte7.nxv1f16(
-    <vscale x 1 x half> %0,
-    i32 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrsqrte7.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfrsqrte7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrte7.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsqrte7.nxv2f16(
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfrsqrte7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrte7.nxv2f16(
-    <vscale x 2 x half> %0,
-    i32 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsqrte7.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfrsqrte7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrte7.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsqrte7.nxv4f16(
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfrsqrte7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrte7.nxv4f16(
-    <vscale x 4 x half> %0,
-    i32 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsqrte7.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfrsqrte7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrte7.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsqrte7.nxv8f16(
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfrsqrte7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrte7.nxv8f16(
-    <vscale x 8 x half> %0,
-    i32 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsqrte7.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfrsqrte7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrte7.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsqrte7.nxv16f16(
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfrsqrte7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrte7.nxv16f16(
-    <vscale x 16 x half> %0,
-    i32 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsqrte7.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfrsqrte7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrte7.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsqrte7.nxv32f16(
-  <vscale x 32 x half>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfrsqrte7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrte7.nxv32f16(
-    <vscale x 32 x half> %0,
-    i32 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsqrte7.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfrsqrte7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrte7.mask.nxv32f16(
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsqrte7.nxv1f32(
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfrsqrte7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrte7.nxv1f32(
-    <vscale x 1 x float> %0,
-    i32 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsqrte7.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfrsqrte7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrte7.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsqrte7.nxv2f32(
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfrsqrte7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrte7.nxv2f32(
-    <vscale x 2 x float> %0,
-    i32 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsqrte7.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfrsqrte7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrte7.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsqrte7.nxv4f32(
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfrsqrte7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrte7.nxv4f32(
-    <vscale x 4 x float> %0,
-    i32 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsqrte7.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfrsqrte7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrte7.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsqrte7.nxv8f32(
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfrsqrte7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrte7.nxv8f32(
-    <vscale x 8 x float> %0,
-    i32 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsqrte7.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfrsqrte7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrte7.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsqrte7.nxv16f32(
-  <vscale x 16 x float>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfrsqrte7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrte7.nxv16f32(
-    <vscale x 16 x float> %0,
-    i32 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsqrte7.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfrsqrte7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrte7.mask.nxv16f32(
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsqrte7.nxv1f64(
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfrsqrte7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrte7.nxv1f64(
-    <vscale x 1 x double> %0,
-    i32 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsqrte7.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfrsqrte7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrte7.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsqrte7.nxv2f64(
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfrsqrte7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrte7.nxv2f64(
-    <vscale x 2 x double> %0,
-    i32 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsqrte7.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfrsqrte7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrte7.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsqrte7.nxv4f64(
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfrsqrte7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrte7.nxv4f64(
-    <vscale x 4 x double> %0,
-    i32 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsqrte7.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfrsqrte7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrte7.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsqrte7.nxv8f64(
-  <vscale x 8 x double>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfrsqrte7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrte7.nxv8f64(
-    <vscale x 8 x double> %0,
-    i32 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsqrte7.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfrsqrte7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrte7.mask.nxv8f64(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/vfrsqrte7-rv64.ll b/llvm/test/CodeGen/RISCV/vfrsqrte7-rv64.ll
deleted file mode 100644
index d0f4c9c4ac58..000000000000
--- a/llvm/test/CodeGen/RISCV/vfrsqrte7-rv64.ll
+++ /dev/null
@@ -1,602 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
-; RUN:   --riscv-no-aliases < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrsqrte7.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrsqrte7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrte7.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrsqrte7.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrsqrte7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrte7.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsqrte7.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrsqrte7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrte7.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsqrte7.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrsqrte7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,mf2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrte7.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsqrte7.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrsqrte7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrte7.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsqrte7.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrsqrte7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m1,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrte7.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsqrte7.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrsqrte7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrte7.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsqrte7.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrsqrte7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrte7.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsqrte7.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrsqrte7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrte7.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsqrte7.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrsqrte7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrte7.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsqrte7.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrsqrte7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrte7.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsqrte7.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrsqrte7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e16,m8,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrte7.mask.nxv32f16(
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsqrte7.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrsqrte7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrte7.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsqrte7.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrsqrte7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrte7.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsqrte7.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrsqrte7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrte7.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsqrte7.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrsqrte7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m1,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrte7.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsqrte7.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrsqrte7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrte7.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsqrte7.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrsqrte7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrte7.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsqrte7.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrsqrte7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrte7.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsqrte7.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrsqrte7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrte7.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsqrte7.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrsqrte7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrte7.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsqrte7.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrsqrte7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e32,m8,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrte7.mask.nxv16f32(
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsqrte7.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrsqrte7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrte7.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsqrte7.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrsqrte7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m1,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v9, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrte7.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsqrte7.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrsqrte7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrte7.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsqrte7.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrsqrte7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m2,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v10, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrte7.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsqrte7.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrsqrte7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrte7.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsqrte7.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrsqrte7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m4,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v12, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrte7.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsqrte7.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrsqrte7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,ta,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v8
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrte7.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsqrte7.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrsqrte7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrte7_mask_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a0, a0, e64,m8,tu,mu
-; CHECK-NEXT:    vfrsqrte7.v v8, v16, v0.t
-; CHECK-NEXT:    jalr zero, 0(ra)
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrte7.mask.nxv8f64(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/MC/RISCV/rvv/fothers.s b/llvm/test/MC/RISCV/rvv/fothers.s
index b54d66016fc4..f894cbc5fc23 100644
--- a/llvm/test/MC/RISCV/rvv/fothers.s
+++ b/llvm/test/MC/RISCV/rvv/fothers.s
@@ -22,26 +22,26 @@ vfsqrt.v v8, v4
 # CHECK-ERROR: instruction requires the following: 'F'{{.*}}'V'
 # CHECK-UNKNOWN: 57 14 40 4e <unknown>
 
-vfrsqrte7.v v8, v4, v0.t
-# CHECK-INST: vfrsqrte7.v v8, v4, v0.t
+vfrsqrt7.v v8, v4, v0.t
+# CHECK-INST: vfrsqrt7.v v8, v4, v0.t
 # CHECK-ENCODING: [0x57,0x14,0x42,0x4c]
 # CHECK-ERROR: instruction requires the following: 'F'{{.*}}'V'
 # CHECK-UNKNOWN: 57 14 42 4c <unknown>
 
-vfrsqrte7.v v8, v4
-# CHECK-INST: vfrsqrte7.v v8, v4
+vfrsqrt7.v v8, v4
+# CHECK-INST: vfrsqrt7.v v8, v4
 # CHECK-ENCODING: [0x57,0x14,0x42,0x4e]
 # CHECK-ERROR: instruction requires the following: 'F'{{.*}}'V'
 # CHECK-UNKNOWN: 57 14 42 4e <unknown>
 
-vfrece7.v v8, v4, v0.t
-# CHECK-INST: vfrece7.v v8, v4, v0.t
+vfrec7.v v8, v4, v0.t
+# CHECK-INST: vfrec7.v v8, v4, v0.t
 # CHECK-ENCODING: [0x57,0x94,0x42,0x4c]
 # CHECK-ERROR: instruction requires the following: 'F'{{.*}}'V'
 # CHECK-UNKNOWN: 57 94 42 4c <unknown>
 
-vfrece7.v v8, v4
-# CHECK-INST: vfrece7.v v8, v4
+vfrec7.v v8, v4
+# CHECK-INST: vfrec7.v v8, v4
 # CHECK-ENCODING: [0x57,0x94,0x42,0x4e]
 # CHECK-ERROR: instruction requires the following: 'F'{{.*}}'V'
 # CHECK-UNKNOWN: 57 94 42 4e <unknown>
diff --git a/llvm/test/MC/RISCV/rvv/invalid.s b/llvm/test/MC/RISCV/rvv/invalid.s
index 9763dc07d12b..d298e2572636 100644
--- a/llvm/test/MC/RISCV/rvv/invalid.s
+++ b/llvm/test/MC/RISCV/rvv/invalid.s
@@ -1,6 +1,15 @@
 # RUN: not llvm-mc -triple=riscv64 --mattr=+experimental-v --mattr=+f %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
 
+vsetivli a2, 32, e8,m1
+# CHECK-ERROR: unknown operand
+
+vsetivli a2, zero, e8,m1
+# CHECK-ERROR: unknown operand
+
+vsetivli a2, 5, e31
+# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]
+
 vsetvli a2, a0, e31
 # CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]
 
diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s
index 4841f5757e76..3d0dbb15c36e 100644
--- a/llvm/test/MC/RISCV/rvv/load.s
+++ b/llvm/test/MC/RISCV/rvv/load.s
@@ -8,6 +8,12 @@
 # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
+vle1.v v8, (a0)
+# CHECK-INST: vle1.v v8, (a0)
+# CHECK-ENCODING: [0x07,0x04,0xb5,0x00]
+# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
+# CHECK-UNKNOWN: 07 04 b5 00 <unknown>
+
 vle8.v v8, (a0), v0.t
 # CHECK-INST: vle8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x00]
@@ -56,54 +62,6 @@ vle64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 05 02 <unknown>
 
-vle128.v v8, (a0), v0.t
-# CHECK-INST: vle128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 05 10 <unknown>
-
-vle128.v v8, (a0)
-# CHECK-INST: vle128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 05 12 <unknown>
-
-vle256.v v8, (a0), v0.t
-# CHECK-INST: vle256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 05 10 <unknown>
-
-vle256.v v8, (a0)
-# CHECK-INST: vle256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 05 12 <unknown>
-
-vle512.v v8, (a0), v0.t
-# CHECK-INST: vle512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 05 10 <unknown>
-
-vle512.v v8, (a0)
-# CHECK-INST: vle512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 05 12 <unknown>
-
-vle1024.v v8, (a0), v0.t
-# CHECK-INST: vle1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 05 10 <unknown>
-
-vle1024.v v8, (a0)
-# CHECK-INST: vle1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 05 12 <unknown>
-
 vle8ff.v v8, (a0), v0.t
 # CHECK-INST: vle8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x01]
@@ -152,54 +110,6 @@ vle64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 05 03 <unknown>
 
-vle128ff.v v8, (a0), v0.t
-# CHECK-INST: vle128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x11]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 05 11 <unknown>
-
-vle128ff.v v8, (a0)
-# CHECK-INST: vle128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x13]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 05 13 <unknown>
-
-vle256ff.v v8, (a0), v0.t
-# CHECK-INST: vle256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x11]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 05 11 <unknown>
-
-vle256ff.v v8, (a0)
-# CHECK-INST: vle256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x13]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 05 13 <unknown>
-
-vle512ff.v v8, (a0), v0.t
-# CHECK-INST: vle512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x11]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 05 11 <unknown>
-
-vle512ff.v v8, (a0)
-# CHECK-INST: vle512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x13]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 05 13 <unknown>
-
-vle1024ff.v v8, (a0), v0.t
-# CHECK-INST: vle1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x11]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 05 11 <unknown>
-
-vle1024ff.v v8, (a0)
-# CHECK-INST: vle1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x13]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 05 13 <unknown>
-
 vlse8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlse8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x08]
@@ -248,54 +158,6 @@ vlse64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 b5 0a <unknown>
 
-vlse128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlse128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 b5 18 <unknown>
-
-vlse128.v v8, (a0), a1
-# CHECK-INST: vlse128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 b5 1a <unknown>
-
-vlse256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlse256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 b5 18 <unknown>
-
-vlse256.v v8, (a0), a1
-# CHECK-INST: vlse256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 b5 1a <unknown>
-
-vlse512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlse512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 b5 18 <unknown>
-
-vlse512.v v8, (a0), a1
-# CHECK-INST: vlse512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 b5 1a <unknown>
-
-vlse1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlse1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 b5 18 <unknown>
-
-vlse1024.v v8, (a0), a1
-# CHECK-INST: vlse1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 b5 1a <unknown>
-
 vluxei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x04]
@@ -416,30 +278,6 @@ vl1re64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 85 22 <unknown>
 
-vl1re128.v v8, (a0)
-# CHECK-INST: vl1re128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x32]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 32 <unknown>
-
-vl1re256.v v8, (a0)
-# CHECK-INST: vl1re256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x32]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 32 <unknown>
-
-vl1re512.v v8, (a0)
-# CHECK-INST: vl1re512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x32]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 32 <unknown>
-
-vl1re1024.v v8, (a0)
-# CHECK-INST: vl1re1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x32]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 32 <unknown>
-
 vl2re8.v v8, (a0)
 # CHECK-INST: vl2re8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x85,0x42]
@@ -464,30 +302,6 @@ vl2re64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 85 42 <unknown>
 
-vl2re128.v v8, (a0)
-# CHECK-INST: vl2re128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x52]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 52 <unknown>
-
-vl2re256.v v8, (a0)
-# CHECK-INST: vl2re256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x52]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 52 <unknown>
-
-vl2re512.v v8, (a0)
-# CHECK-INST: vl2re512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x52]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 52 <unknown>
-
-vl2re1024.v v8, (a0)
-# CHECK-INST: vl2re1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x52]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 52 <unknown>
-
 vl4re8.v v8, (a0)
 # CHECK-INST: vl4re8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x85,0x82]
@@ -512,30 +326,6 @@ vl4re64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 85 82 <unknown>
 
-vl4re128.v v8, (a0)
-# CHECK-INST: vl4re128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x92]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 92 <unknown>
-
-vl4re256.v v8, (a0)
-# CHECK-INST: vl4re256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x92]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 92 <unknown>
-
-vl4re512.v v8, (a0)
-# CHECK-INST: vl4re512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x92]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 92 <unknown>
-
-vl4re1024.v v8, (a0)
-# CHECK-INST: vl4re1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x92]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 92 <unknown>
-
 vl8re8.v v8, (a0)
 # CHECK-INST: vl8re8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x85,0x02]
@@ -559,27 +349,3 @@ vl8re64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 07 74 85 02 <unknown>
-
-vl8re128.v v8, (a0)
-# CHECK-INST: vl8re128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 12 <unknown>
-
-vl8re256.v v8, (a0)
-# CHECK-INST: vl8re256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 12 <unknown>
-
-vl8re512.v v8, (a0)
-# CHECK-INST: vl8re512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 12 <unknown>
-
-vl8re1024.v v8, (a0)
-# CHECK-INST: vl8re1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 12 <unknown>
diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s
index 8437bf7f9030..e4795aa1c2c9 100644
--- a/llvm/test/MC/RISCV/rvv/store.s
+++ b/llvm/test/MC/RISCV/rvv/store.s
@@ -8,6 +8,12 @@
 # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
+vse1.v v24, (a0)
+# CHECK-INST: vse1.v v24, (a0)
+# CHECK-ENCODING: [0x27,0x0c,0xb5,0x00]
+# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
+# CHECK-UNKNOWN: 27 0c b5 00 <unknown>
+
 vse8.v v24, (a0), v0.t
 # CHECK-INST: vse8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x00]
@@ -56,54 +62,6 @@ vse64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 27 7c 05 02 <unknown>
 
-vse128.v v24, (a0), v0.t
-# CHECK-INST: vse128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 05 10 <unknown>
-
-vse128.v v24, (a0)
-# CHECK-INST: vse128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 05 12 <unknown>
-
-vse256.v v24, (a0), v0.t
-# CHECK-INST: vse256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 5c 05 10 <unknown>
-
-vse256.v v24, (a0)
-# CHECK-INST: vse256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 5c 05 12 <unknown>
-
-vse512.v v24, (a0), v0.t
-# CHECK-INST: vse512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 6c 05 10 <unknown>
-
-vse512.v v24, (a0)
-# CHECK-INST: vse512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 6c 05 12 <unknown>
-
-vse1024.v v24, (a0), v0.t
-# CHECK-INST: vse1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x10]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 7c 05 10 <unknown>
-
-vse1024.v v24, (a0)
-# CHECK-INST: vse1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x12]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 7c 05 12 <unknown>
-
 vsse8.v v24, (a0), a1, v0.t
 # CHECK-INST: vsse8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x08]
@@ -152,54 +110,6 @@ vsse64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 27 7c b5 0a <unknown>
 
-vsse128.v v24, (a0), a1, v0.t
-# CHECK-INST: vsse128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c b5 18 <unknown>
-
-vsse128.v v24, (a0), a1
-# CHECK-INST: vsse128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c b5 1a <unknown>
-
-vsse256.v v24, (a0), a1, v0.t
-# CHECK-INST: vsse256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 5c b5 18 <unknown>
-
-vsse256.v v24, (a0), a1
-# CHECK-INST: vsse256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 5c b5 1a <unknown>
-
-vsse512.v v24, (a0), a1, v0.t
-# CHECK-INST: vsse512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 6c b5 18 <unknown>
-
-vsse512.v v24, (a0), a1
-# CHECK-INST: vsse512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 6c b5 1a <unknown>
-
-vsse1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vsse1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x18]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 7c b5 18 <unknown>
-
-vsse1024.v v24, (a0), a1
-# CHECK-INST: vsse1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x1a]
-# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 7c b5 1a <unknown>
-
 vsuxei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x04]
diff --git a/llvm/test/MC/RISCV/rvv/vsetvl.s b/llvm/test/MC/RISCV/rvv/vsetvl.s
index c32126b0e24f..d792908627a6 100644
--- a/llvm/test/MC/RISCV/rvv/vsetvl.s
+++ b/llvm/test/MC/RISCV/rvv/vsetvl.s
@@ -79,3 +79,21 @@ vsetvl a2, a0, a1
 # CHECK-ENCODING: [0x57,0x76,0xb5,0x80]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
 # CHECK-UNKNOWN: 57 76 b5 80 <unknown>
+
+vsetivli a2, 0, e32,m1,ta,ma
+# CHECK-INST: vsetivli a2, 0, e32,m1,ta,ma
+# CHECK-ENCODING: [0x57,0x76,0x00,0xcd]
+# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
+# CHECK-UNKNOWN: 57 76 00 cd <unknown>
+
+vsetivli a2, 15, e32,m1,ta,ma
+# CHECK-INST: vsetivli a2, 15, e32,m1,ta,ma
+# CHECK-ENCODING: [0x57,0xf6,0x07,0xcd]
+# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
+# CHECK-UNKNOWN: 57 f6 07 cd <unknown>
+
+vsetivli a2, 31, e32,m1,ta,ma
+# CHECK-INST: vsetivli a2, 31, e32,m1,ta,ma
+# CHECK-ENCODING: [0x57,0xf6,0x0f,0xcd]
+# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
+# CHECK-UNKNOWN: 57 f6 0f cd <unknown>
diff --git a/llvm/test/MC/RISCV/rvv/zvlsseg.s b/llvm/test/MC/RISCV/rvv/zvlsseg.s
index b41b6984bcb2..98c7a59eb31e 100644
--- a/llvm/test/MC/RISCV/rvv/zvlsseg.s
+++ b/llvm/test/MC/RISCV/rvv/zvlsseg.s
@@ -60,54 +60,6 @@ vlseg2e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 22 <unknown>
 
-vlseg2e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 30 <unknown>
-
-vlseg2e128.v v8, (a0)
-# CHECK-INST: vlseg2e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 32 <unknown>
-
-vlseg2e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 30 <unknown>
-
-vlseg2e256.v v8, (a0)
-# CHECK-INST: vlseg2e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 32 <unknown>
-
-vlseg2e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 30 <unknown>
-
-vlseg2e512.v v8, (a0)
-# CHECK-INST: vlseg2e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 32 <unknown>
-
-vlseg2e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 30 <unknown>
-
-vlseg2e1024.v v8, (a0)
-# CHECK-INST: vlseg2e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 32 <unknown>
-
 vlseg2e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x21]
@@ -156,54 +108,6 @@ vlseg2e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 23 <unknown>
 
-vlseg2e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x31]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 31 <unknown>
-
-vlseg2e128ff.v v8, (a0)
-# CHECK-INST: vlseg2e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x33]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 33 <unknown>
-
-vlseg2e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x31]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 31 <unknown>
-
-vlseg2e256ff.v v8, (a0)
-# CHECK-INST: vlseg2e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x33]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 33 <unknown>
-
-vlseg2e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x31]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 31 <unknown>
-
-vlseg2e512ff.v v8, (a0)
-# CHECK-INST: vlseg2e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x33]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 33 <unknown>
-
-vlseg2e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg2e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x31]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 31 <unknown>
-
-vlseg2e1024ff.v v8, (a0)
-# CHECK-INST: vlseg2e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x33]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 33 <unknown>
-
 vlsseg2e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x28]
@@ -252,54 +156,6 @@ vlsseg2e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 2a <unknown>
 
-vlsseg2e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg2e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 38 <unknown>
-
-vlsseg2e128.v v8, (a0), a1
-# CHECK-INST: vlsseg2e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 3a <unknown>
-
-vlsseg2e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg2e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 38 <unknown>
-
-vlsseg2e256.v v8, (a0), a1
-# CHECK-INST: vlsseg2e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 3a <unknown>
-
-vlsseg2e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg2e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 38 <unknown>
-
-vlsseg2e512.v v8, (a0), a1
-# CHECK-INST: vlsseg2e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 3a <unknown>
-
-vlsseg2e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg2e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 38 <unknown>
-
-vlsseg2e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg2e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 3a <unknown>
-
 vluxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x24]
@@ -444,54 +300,6 @@ vlseg3e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 42 <unknown>
 
-vlseg3e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 50 <unknown>
-
-vlseg3e128.v v8, (a0)
-# CHECK-INST: vlseg3e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 52 <unknown>
-
-vlseg3e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 50 <unknown>
-
-vlseg3e256.v v8, (a0)
-# CHECK-INST: vlseg3e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 52 <unknown>
-
-vlseg3e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 50 <unknown>
-
-vlseg3e512.v v8, (a0)
-# CHECK-INST: vlseg3e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 52 <unknown>
-
-vlseg3e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 50 <unknown>
-
-vlseg3e1024.v v8, (a0)
-# CHECK-INST: vlseg3e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 52 <unknown>
-
 vlseg3e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x41]
@@ -540,54 +348,6 @@ vlseg3e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 43 <unknown>
 
-vlseg3e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x51]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 51 <unknown>
-
-vlseg3e128ff.v v8, (a0)
-# CHECK-INST: vlseg3e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x53]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 53 <unknown>
-
-vlseg3e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x51]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 51 <unknown>
-
-vlseg3e256ff.v v8, (a0)
-# CHECK-INST: vlseg3e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x53]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 53 <unknown>
-
-vlseg3e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x51]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 51 <unknown>
-
-vlseg3e512ff.v v8, (a0)
-# CHECK-INST: vlseg3e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x53]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 53 <unknown>
-
-vlseg3e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg3e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x51]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 51 <unknown>
-
-vlseg3e1024ff.v v8, (a0)
-# CHECK-INST: vlseg3e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x53]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 53 <unknown>
-
 vlsseg3e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x48]
@@ -636,54 +396,6 @@ vlsseg3e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 4a <unknown>
 
-vlsseg3e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg3e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 58 <unknown>
-
-vlsseg3e128.v v8, (a0), a1
-# CHECK-INST: vlsseg3e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 5a <unknown>
-
-vlsseg3e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg3e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 58 <unknown>
-
-vlsseg3e256.v v8, (a0), a1
-# CHECK-INST: vlsseg3e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 5a <unknown>
-
-vlsseg3e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg3e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 58 <unknown>
-
-vlsseg3e512.v v8, (a0), a1
-# CHECK-INST: vlsseg3e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 5a <unknown>
-
-vlsseg3e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg3e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 58 <unknown>
-
-vlsseg3e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg3e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 5a <unknown>
-
 vluxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x44]
@@ -828,54 +540,6 @@ vlseg4e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 62 <unknown>
 
-vlseg4e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 70 <unknown>
-
-vlseg4e128.v v8, (a0)
-# CHECK-INST: vlseg4e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 72 <unknown>
-
-vlseg4e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 70 <unknown>
-
-vlseg4e256.v v8, (a0)
-# CHECK-INST: vlseg4e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 72 <unknown>
-
-vlseg4e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 70 <unknown>
-
-vlseg4e512.v v8, (a0)
-# CHECK-INST: vlseg4e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 72 <unknown>
-
-vlseg4e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 70 <unknown>
-
-vlseg4e1024.v v8, (a0)
-# CHECK-INST: vlseg4e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 72 <unknown>
-
 vlseg4e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x61]
@@ -924,54 +588,6 @@ vlseg4e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 63 <unknown>
 
-vlseg4e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x71]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 71 <unknown>
-
-vlseg4e128ff.v v8, (a0)
-# CHECK-INST: vlseg4e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x73]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 73 <unknown>
-
-vlseg4e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x71]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 71 <unknown>
-
-vlseg4e256ff.v v8, (a0)
-# CHECK-INST: vlseg4e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x73]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 73 <unknown>
-
-vlseg4e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x71]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 71 <unknown>
-
-vlseg4e512ff.v v8, (a0)
-# CHECK-INST: vlseg4e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x73]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 73 <unknown>
-
-vlseg4e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg4e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x71]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 71 <unknown>
-
-vlseg4e1024ff.v v8, (a0)
-# CHECK-INST: vlseg4e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x73]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 73 <unknown>
-
 vlsseg4e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x68]
@@ -1020,54 +636,6 @@ vlsseg4e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 6a <unknown>
 
-vlsseg4e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg4e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 78 <unknown>
-
-vlsseg4e128.v v8, (a0), a1
-# CHECK-INST: vlsseg4e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 7a <unknown>
-
-vlsseg4e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg4e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 78 <unknown>
-
-vlsseg4e256.v v8, (a0), a1
-# CHECK-INST: vlsseg4e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 7a <unknown>
-
-vlsseg4e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg4e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 78 <unknown>
-
-vlsseg4e512.v v8, (a0), a1
-# CHECK-INST: vlsseg4e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 7a <unknown>
-
-vlsseg4e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg4e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 78 <unknown>
-
-vlsseg4e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg4e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 7a <unknown>
-
 vluxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x64]
@@ -1212,54 +780,6 @@ vlseg5e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 82 <unknown>
 
-vlseg5e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 90 <unknown>
-
-vlseg5e128.v v8, (a0)
-# CHECK-INST: vlseg5e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 92 <unknown>
-
-vlseg5e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 90 <unknown>
-
-vlseg5e256.v v8, (a0)
-# CHECK-INST: vlseg5e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 92 <unknown>
-
-vlseg5e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 90 <unknown>
-
-vlseg5e512.v v8, (a0)
-# CHECK-INST: vlseg5e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 92 <unknown>
-
-vlseg5e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 90 <unknown>
-
-vlseg5e1024.v v8, (a0)
-# CHECK-INST: vlseg5e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 92 <unknown>
-
 vlseg5e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x81]
@@ -1308,54 +828,6 @@ vlseg5e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 83 <unknown>
 
-vlseg5e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0x91]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 91 <unknown>
-
-vlseg5e128ff.v v8, (a0)
-# CHECK-INST: vlseg5e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0x93]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 93 <unknown>
-
-vlseg5e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0x91]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 91 <unknown>
-
-vlseg5e256ff.v v8, (a0)
-# CHECK-INST: vlseg5e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0x93]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 93 <unknown>
-
-vlseg5e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0x91]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 91 <unknown>
-
-vlseg5e512ff.v v8, (a0)
-# CHECK-INST: vlseg5e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0x93]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 93 <unknown>
-
-vlseg5e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg5e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0x91]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 91 <unknown>
-
-vlseg5e1024ff.v v8, (a0)
-# CHECK-INST: vlseg5e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0x93]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 93 <unknown>
-
 vlsseg5e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x88]
@@ -1404,54 +876,6 @@ vlsseg5e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 8a <unknown>
 
-vlsseg5e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg5e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 98 <unknown>
-
-vlsseg5e128.v v8, (a0), a1
-# CHECK-INST: vlsseg5e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 9a <unknown>
-
-vlsseg5e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg5e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 98 <unknown>
-
-vlsseg5e256.v v8, (a0), a1
-# CHECK-INST: vlsseg5e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 9a <unknown>
-
-vlsseg5e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg5e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 98 <unknown>
-
-vlsseg5e512.v v8, (a0), a1
-# CHECK-INST: vlsseg5e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 9a <unknown>
-
-vlsseg5e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg5e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 98 <unknown>
-
-vlsseg5e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg5e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 9a <unknown>
-
 vluxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x84]
@@ -1596,54 +1020,6 @@ vlseg6e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 a2 <unknown>
 
-vlseg6e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 b0 <unknown>
-
-vlseg6e128.v v8, (a0)
-# CHECK-INST: vlseg6e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 b2 <unknown>
-
-vlseg6e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 b0 <unknown>
-
-vlseg6e256.v v8, (a0)
-# CHECK-INST: vlseg6e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 b2 <unknown>
-
-vlseg6e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 b0 <unknown>
-
-vlseg6e512.v v8, (a0)
-# CHECK-INST: vlseg6e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 b2 <unknown>
-
-vlseg6e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 b0 <unknown>
-
-vlseg6e1024.v v8, (a0)
-# CHECK-INST: vlseg6e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 b2 <unknown>
-
 vlseg6e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa1]
@@ -1692,54 +1068,6 @@ vlseg6e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 a3 <unknown>
 
-vlseg6e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0xb1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 b1 <unknown>
-
-vlseg6e128ff.v v8, (a0)
-# CHECK-INST: vlseg6e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0xb3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 b3 <unknown>
-
-vlseg6e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0xb1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 b1 <unknown>
-
-vlseg6e256ff.v v8, (a0)
-# CHECK-INST: vlseg6e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0xb3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 b3 <unknown>
-
-vlseg6e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0xb1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 b1 <unknown>
-
-vlseg6e512ff.v v8, (a0)
-# CHECK-INST: vlseg6e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0xb3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 b3 <unknown>
-
-vlseg6e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg6e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0xb1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 b1 <unknown>
-
-vlseg6e1024ff.v v8, (a0)
-# CHECK-INST: vlseg6e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0xb3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 b3 <unknown>
-
 vlsseg6e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xa8]
@@ -1788,54 +1116,6 @@ vlsseg6e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 aa <unknown>
 
-vlsseg6e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg6e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 b8 <unknown>
-
-vlsseg6e128.v v8, (a0), a1
-# CHECK-INST: vlsseg6e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 ba <unknown>
-
-vlsseg6e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg6e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 b8 <unknown>
-
-vlsseg6e256.v v8, (a0), a1
-# CHECK-INST: vlsseg6e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 ba <unknown>
-
-vlsseg6e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg6e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 b8 <unknown>
-
-vlsseg6e512.v v8, (a0), a1
-# CHECK-INST: vlsseg6e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 ba <unknown>
-
-vlsseg6e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg6e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 b8 <unknown>
-
-vlsseg6e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg6e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 ba <unknown>
-
 vluxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xa4]
@@ -1980,54 +1260,6 @@ vlseg7e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 c2 <unknown>
 
-vlseg7e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 d0 <unknown>
-
-vlseg7e128.v v8, (a0)
-# CHECK-INST: vlseg7e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 d2 <unknown>
-
-vlseg7e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 d0 <unknown>
-
-vlseg7e256.v v8, (a0)
-# CHECK-INST: vlseg7e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 d2 <unknown>
-
-vlseg7e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 d0 <unknown>
-
-vlseg7e512.v v8, (a0)
-# CHECK-INST: vlseg7e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 d2 <unknown>
-
-vlseg7e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 d0 <unknown>
-
-vlseg7e1024.v v8, (a0)
-# CHECK-INST: vlseg7e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 d2 <unknown>
-
 vlseg7e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc1]
@@ -2076,54 +1308,6 @@ vlseg7e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 c3 <unknown>
 
-vlseg7e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0xd1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 d1 <unknown>
-
-vlseg7e128ff.v v8, (a0)
-# CHECK-INST: vlseg7e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0xd3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 d3 <unknown>
-
-vlseg7e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0xd1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 d1 <unknown>
-
-vlseg7e256ff.v v8, (a0)
-# CHECK-INST: vlseg7e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0xd3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 d3 <unknown>
-
-vlseg7e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0xd1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 d1 <unknown>
-
-vlseg7e512ff.v v8, (a0)
-# CHECK-INST: vlseg7e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0xd3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 d3 <unknown>
-
-vlseg7e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg7e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0xd1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 d1 <unknown>
-
-vlseg7e1024ff.v v8, (a0)
-# CHECK-INST: vlseg7e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0xd3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 d3 <unknown>
-
 vlsseg7e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xc8]
@@ -2172,54 +1356,6 @@ vlsseg7e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 ca <unknown>
 
-vlsseg7e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg7e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 d8 <unknown>
-
-vlsseg7e128.v v8, (a0), a1
-# CHECK-INST: vlsseg7e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 da <unknown>
-
-vlsseg7e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg7e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 d8 <unknown>
-
-vlsseg7e256.v v8, (a0), a1
-# CHECK-INST: vlsseg7e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 da <unknown>
-
-vlsseg7e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg7e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 d8 <unknown>
-
-vlsseg7e512.v v8, (a0), a1
-# CHECK-INST: vlsseg7e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 da <unknown>
-
-vlsseg7e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg7e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 d8 <unknown>
-
-vlsseg7e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg7e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 da <unknown>
-
 vluxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xc4]
@@ -2364,54 +1500,6 @@ vlseg8e64.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 e2 <unknown>
 
-vlseg8e128.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e128.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 f0 <unknown>
-
-vlseg8e128.v v8, (a0)
-# CHECK-INST: vlseg8e128.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 f2 <unknown>
-
-vlseg8e256.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e256.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 f0 <unknown>
-
-vlseg8e256.v v8, (a0)
-# CHECK-INST: vlseg8e256.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 f2 <unknown>
-
-vlseg8e512.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e512.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 f0 <unknown>
-
-vlseg8e512.v v8, (a0)
-# CHECK-INST: vlseg8e512.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 f2 <unknown>
-
-vlseg8e1024.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e1024.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 f0 <unknown>
-
-vlseg8e1024.v v8, (a0)
-# CHECK-INST: vlseg8e1024.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 f2 <unknown>
-
 vlseg8e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe1]
@@ -2460,54 +1548,6 @@ vlseg8e64ff.v v8, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 05 e3 <unknown>
 
-vlseg8e128ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e128ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x04,0x05,0xf1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 f1 <unknown>
-
-vlseg8e128ff.v v8, (a0)
-# CHECK-INST: vlseg8e128ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x05,0xf3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 05 f3 <unknown>
-
-vlseg8e256ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e256ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x54,0x05,0xf1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 f1 <unknown>
-
-vlseg8e256ff.v v8, (a0)
-# CHECK-INST: vlseg8e256ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x05,0xf3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 05 f3 <unknown>
-
-vlseg8e512ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e512ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x64,0x05,0xf1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 f1 <unknown>
-
-vlseg8e512ff.v v8, (a0)
-# CHECK-INST: vlseg8e512ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x05,0xf3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 05 f3 <unknown>
-
-vlseg8e1024ff.v v8, (a0), v0.t
-# CHECK-INST: vlseg8e1024ff.v v8, (a0), v0.t
-# CHECK-ENCODING: [0x07,0x74,0x05,0xf1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 f1 <unknown>
-
-vlseg8e1024ff.v v8, (a0)
-# CHECK-INST: vlseg8e1024ff.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x05,0xf3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 05 f3 <unknown>
-
 vlsseg8e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xe8]
@@ -2556,54 +1596,6 @@ vlsseg8e64.v v8, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 07 74 b5 ea <unknown>
 
-vlsseg8e128.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg8e128.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x04,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 f8 <unknown>
-
-vlsseg8e128.v v8, (a0), a1
-# CHECK-INST: vlsseg8e128.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x04,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 04 b5 fa <unknown>
-
-vlsseg8e256.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg8e256.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x54,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 f8 <unknown>
-
-vlsseg8e256.v v8, (a0), a1
-# CHECK-INST: vlsseg8e256.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x54,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 54 b5 fa <unknown>
-
-vlsseg8e512.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg8e512.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x64,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 f8 <unknown>
-
-vlsseg8e512.v v8, (a0), a1
-# CHECK-INST: vlsseg8e512.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x64,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 64 b5 fa <unknown>
-
-vlsseg8e1024.v v8, (a0), a1, v0.t
-# CHECK-INST: vlsseg8e1024.v v8, (a0), a1, v0.t
-# CHECK-ENCODING: [0x07,0x74,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 f8 <unknown>
-
-vlsseg8e1024.v v8, (a0), a1
-# CHECK-INST: vlsseg8e1024.v v8, (a0), a1
-# CHECK-ENCODING: [0x07,0x74,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 07 74 b5 fa <unknown>
-
 vluxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xe4]
@@ -2748,54 +1740,6 @@ vsseg2e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 22 <unknown>
 
-vsseg2e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg2e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 30 <unknown>
-
-vsseg2e128.v v24, (a0)
-# CHECK-INST: vsseg2e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 32 <unknown>
-
-vsseg2e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg2e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 30 <unknown>
-
-vsseg2e256.v v24, (a0)
-# CHECK-INST: vsseg2e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 32 <unknown>
-
-vsseg2e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg2e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 30 <unknown>
-
-vsseg2e512.v v24, (a0)
-# CHECK-INST: vsseg2e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 32 <unknown>
-
-vsseg2e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg2e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x30]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 30 <unknown>
-
-vsseg2e1024.v v24, (a0)
-# CHECK-INST: vsseg2e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x32]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 32 <unknown>
-
 vssseg2e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x28]
@@ -2844,54 +1788,6 @@ vssseg2e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 2a <unknown>
 
-vssseg2e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg2e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 38 <unknown>
-
-vssseg2e128.v v24, (a0), a1
-# CHECK-INST: vssseg2e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 3a <unknown>
-
-vssseg2e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg2e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 38 <unknown>
-
-vssseg2e256.v v24, (a0), a1
-# CHECK-INST: vssseg2e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 3a <unknown>
-
-vssseg2e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg2e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 38 <unknown>
-
-vssseg2e512.v v24, (a0), a1
-# CHECK-INST: vssseg2e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 3a <unknown>
-
-vssseg2e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg2e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x38]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 38 <unknown>
-
-vssseg2e1024.v v24, (a0), a1
-# CHECK-INST: vssseg2e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x3a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 3a <unknown>
-
 vsuxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x24]
@@ -3036,54 +1932,6 @@ vsseg3e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 42 <unknown>
 
-vsseg3e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg3e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 50 <unknown>
-
-vsseg3e128.v v24, (a0)
-# CHECK-INST: vsseg3e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 52 <unknown>
-
-vsseg3e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg3e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 50 <unknown>
-
-vsseg3e256.v v24, (a0)
-# CHECK-INST: vsseg3e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 52 <unknown>
-
-vsseg3e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg3e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 50 <unknown>
-
-vsseg3e512.v v24, (a0)
-# CHECK-INST: vsseg3e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 52 <unknown>
-
-vsseg3e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg3e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x50]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 50 <unknown>
-
-vsseg3e1024.v v24, (a0)
-# CHECK-INST: vsseg3e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x52]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 52 <unknown>
-
 vssseg3e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x48]
@@ -3132,54 +1980,6 @@ vssseg3e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 4a <unknown>
 
-vssseg3e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg3e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 58 <unknown>
-
-vssseg3e128.v v24, (a0), a1
-# CHECK-INST: vssseg3e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 5a <unknown>
-
-vssseg3e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg3e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 58 <unknown>
-
-vssseg3e256.v v24, (a0), a1
-# CHECK-INST: vssseg3e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 5a <unknown>
-
-vssseg3e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg3e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 58 <unknown>
-
-vssseg3e512.v v24, (a0), a1
-# CHECK-INST: vssseg3e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 5a <unknown>
-
-vssseg3e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg3e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x58]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 58 <unknown>
-
-vssseg3e1024.v v24, (a0), a1
-# CHECK-INST: vssseg3e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x5a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 5a <unknown>
-
 vsuxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x44]
@@ -3324,54 +2124,6 @@ vsseg4e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 62 <unknown>
 
-vsseg4e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg4e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 70 <unknown>
-
-vsseg4e128.v v24, (a0)
-# CHECK-INST: vsseg4e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 72 <unknown>
-
-vsseg4e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg4e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 70 <unknown>
-
-vsseg4e256.v v24, (a0)
-# CHECK-INST: vsseg4e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 72 <unknown>
-
-vsseg4e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg4e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 70 <unknown>
-
-vsseg4e512.v v24, (a0)
-# CHECK-INST: vsseg4e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 72 <unknown>
-
-vsseg4e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg4e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x70]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 70 <unknown>
-
-vsseg4e1024.v v24, (a0)
-# CHECK-INST: vsseg4e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x72]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 72 <unknown>
-
 vssseg4e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x68]
@@ -3420,54 +2172,6 @@ vssseg4e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 6a <unknown>
 
-vssseg4e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg4e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 78 <unknown>
-
-vssseg4e128.v v24, (a0), a1
-# CHECK-INST: vssseg4e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 7a <unknown>
-
-vssseg4e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg4e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 78 <unknown>
-
-vssseg4e256.v v24, (a0), a1
-# CHECK-INST: vssseg4e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 7a <unknown>
-
-vssseg4e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg4e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 78 <unknown>
-
-vssseg4e512.v v24, (a0), a1
-# CHECK-INST: vssseg4e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 7a <unknown>
-
-vssseg4e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg4e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x78]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 78 <unknown>
-
-vssseg4e1024.v v24, (a0), a1
-# CHECK-INST: vssseg4e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x7a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 7a <unknown>
-
 vsuxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x64]
@@ -3612,54 +2316,6 @@ vsseg5e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 82 <unknown>
 
-vsseg5e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg5e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 90 <unknown>
-
-vsseg5e128.v v24, (a0)
-# CHECK-INST: vsseg5e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 92 <unknown>
-
-vsseg5e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg5e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 90 <unknown>
-
-vsseg5e256.v v24, (a0)
-# CHECK-INST: vsseg5e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 92 <unknown>
-
-vsseg5e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg5e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 90 <unknown>
-
-vsseg5e512.v v24, (a0)
-# CHECK-INST: vsseg5e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 92 <unknown>
-
-vsseg5e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg5e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x90]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 90 <unknown>
-
-vsseg5e1024.v v24, (a0)
-# CHECK-INST: vsseg5e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0x92]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 92 <unknown>
-
 vssseg5e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x88]
@@ -3708,54 +2364,6 @@ vssseg5e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 8a <unknown>
 
-vssseg5e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg5e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 98 <unknown>
-
-vssseg5e128.v v24, (a0), a1
-# CHECK-INST: vssseg5e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 9a <unknown>
-
-vssseg5e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg5e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 98 <unknown>
-
-vssseg5e256.v v24, (a0), a1
-# CHECK-INST: vssseg5e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 9a <unknown>
-
-vssseg5e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg5e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 98 <unknown>
-
-vssseg5e512.v v24, (a0), a1
-# CHECK-INST: vssseg5e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 9a <unknown>
-
-vssseg5e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg5e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x98]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 98 <unknown>
-
-vssseg5e1024.v v24, (a0), a1
-# CHECK-INST: vssseg5e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0x9a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 9a <unknown>
-
 vsuxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x84]
@@ -3900,54 +2508,6 @@ vsseg6e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 a2 <unknown>
 
-vsseg6e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg6e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 b0 <unknown>
-
-vsseg6e128.v v24, (a0)
-# CHECK-INST: vsseg6e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 b2 <unknown>
-
-vsseg6e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg6e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 b0 <unknown>
-
-vsseg6e256.v v24, (a0)
-# CHECK-INST: vsseg6e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 b2 <unknown>
-
-vsseg6e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg6e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 b0 <unknown>
-
-vsseg6e512.v v24, (a0)
-# CHECK-INST: vsseg6e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 b2 <unknown>
-
-vsseg6e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg6e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0xb0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 b0 <unknown>
-
-vsseg6e1024.v v24, (a0)
-# CHECK-INST: vsseg6e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0xb2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 b2 <unknown>
-
 vssseg6e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xa8]
@@ -3996,54 +2556,6 @@ vssseg6e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 aa <unknown>
 
-vssseg6e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg6e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 b8 <unknown>
-
-vssseg6e128.v v24, (a0), a1
-# CHECK-INST: vssseg6e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 ba <unknown>
-
-vssseg6e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg6e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 b8 <unknown>
-
-vssseg6e256.v v24, (a0), a1
-# CHECK-INST: vssseg6e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 ba <unknown>
-
-vssseg6e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg6e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 b8 <unknown>
-
-vssseg6e512.v v24, (a0), a1
-# CHECK-INST: vssseg6e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 ba <unknown>
-
-vssseg6e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg6e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0xb8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 b8 <unknown>
-
-vssseg6e1024.v v24, (a0), a1
-# CHECK-INST: vssseg6e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0xba]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 ba <unknown>
-
 vsuxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xa4]
@@ -4188,54 +2700,6 @@ vsseg7e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 c2 <unknown>
 
-vsseg7e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg7e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 d0 <unknown>
-
-vsseg7e128.v v24, (a0)
-# CHECK-INST: vsseg7e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 d2 <unknown>
-
-vsseg7e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg7e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 d0 <unknown>
-
-vsseg7e256.v v24, (a0)
-# CHECK-INST: vsseg7e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 d2 <unknown>
-
-vsseg7e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg7e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 d0 <unknown>
-
-vsseg7e512.v v24, (a0)
-# CHECK-INST: vsseg7e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 d2 <unknown>
-
-vsseg7e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg7e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0xd0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 d0 <unknown>
-
-vsseg7e1024.v v24, (a0)
-# CHECK-INST: vsseg7e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0xd2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 d2 <unknown>
-
 vssseg7e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xc8]
@@ -4284,54 +2748,6 @@ vssseg7e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 ca <unknown>
 
-vssseg7e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg7e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 d8 <unknown>
-
-vssseg7e128.v v24, (a0), a1
-# CHECK-INST: vssseg7e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 da <unknown>
-
-vssseg7e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg7e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 d8 <unknown>
-
-vssseg7e256.v v24, (a0), a1
-# CHECK-INST: vssseg7e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 da <unknown>
-
-vssseg7e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg7e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 d8 <unknown>
-
-vssseg7e512.v v24, (a0), a1
-# CHECK-INST: vssseg7e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 da <unknown>
-
-vssseg7e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg7e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0xd8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 d8 <unknown>
-
-vssseg7e1024.v v24, (a0), a1
-# CHECK-INST: vssseg7e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0xda]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 da <unknown>
-
 vsuxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xc4]
@@ -4476,54 +2892,6 @@ vsseg8e64.v v24, (a0)
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c 05 e2 <unknown>
 
-vsseg8e128.v v24, (a0), v0.t
-# CHECK-INST: vsseg8e128.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x0c,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 f0 <unknown>
-
-vsseg8e128.v v24, (a0)
-# CHECK-INST: vsseg8e128.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c 05 f2 <unknown>
-
-vsseg8e256.v v24, (a0), v0.t
-# CHECK-INST: vsseg8e256.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x5c,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 f0 <unknown>
-
-vsseg8e256.v v24, (a0)
-# CHECK-INST: vsseg8e256.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x5c,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c 05 f2 <unknown>
-
-vsseg8e512.v v24, (a0), v0.t
-# CHECK-INST: vsseg8e512.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x6c,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 f0 <unknown>
-
-vsseg8e512.v v24, (a0)
-# CHECK-INST: vsseg8e512.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x6c,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c 05 f2 <unknown>
-
-vsseg8e1024.v v24, (a0), v0.t
-# CHECK-INST: vsseg8e1024.v v24, (a0), v0.t
-# CHECK-ENCODING: [0x27,0x7c,0x05,0xf0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 f0 <unknown>
-
-vsseg8e1024.v v24, (a0)
-# CHECK-INST: vsseg8e1024.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x7c,0x05,0xf2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c 05 f2 <unknown>
-
 vssseg8e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xe8]
@@ -4572,54 +2940,6 @@ vssseg8e64.v v24, (a0), a1
 # CHECK-ERROR: instruction requires the following: 'Zvlsseg'
 # CHECK-UNKNOWN: 27 7c b5 ea <unknown>
 
-vssseg8e128.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg8e128.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 f8 <unknown>
-
-vssseg8e128.v v24, (a0), a1
-# CHECK-INST: vssseg8e128.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x0c,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 0c b5 fa <unknown>
-
-vssseg8e256.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg8e256.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 f8 <unknown>
-
-vssseg8e256.v v24, (a0), a1
-# CHECK-INST: vssseg8e256.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x5c,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 5c b5 fa <unknown>
-
-vssseg8e512.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg8e512.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 f8 <unknown>
-
-vssseg8e512.v v24, (a0), a1
-# CHECK-INST: vssseg8e512.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x6c,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 6c b5 fa <unknown>
-
-vssseg8e1024.v v24, (a0), a1, v0.t
-# CHECK-INST: vssseg8e1024.v v24, (a0), a1, v0.t
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0xf8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 f8 <unknown>
-
-vssseg8e1024.v v24, (a0), a1
-# CHECK-INST: vssseg8e1024.v v24, (a0), a1
-# CHECK-ENCODING: [0x27,0x7c,0xb5,0xfa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
-# CHECK-UNKNOWN: 27 7c b5 fa <unknown>
-
 vsuxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xe4]

From e5c6c5c16923b2127a92e8a596ade50b97111b03 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 27 Jan 2021 20:34:35 -0800
Subject: [PATCH 089/244] IntrinsicEmitter: Change IntrinsicsToAttributesMap
 from uint8_t[] to uint16_t[]

We need at least 252 UniqAttributes now, which will soon overflow.
Actually with downstream backends we can easily use up the last few values.
So bump to uint16_t.

(cherry picked from commit b7d63244226ba2c0df651622fe7fe3f5f8aba262)
---
 llvm/utils/TableGen/IntrinsicEmitter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 4be0d90a45d2..978d24c8300d 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -638,13 +638,13 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
       std::max(maxArgAttrs, unsigned(intrinsic.ArgumentAttributes.size()));
     unsigned &N = UniqAttributes[&intrinsic];
     if (N) continue;
-    assert(AttrNum < 256 && "Too many unique attributes for table!");
     N = ++AttrNum;
+    assert(N < 65536 && "Too many unique attributes for table!");
   }
 
   // Emit an array of AttributeList.  Most intrinsics will have at least one
   // entry, for the function itself (index ~1), which is usually nounwind.
-  OS << "  static const uint8_t IntrinsicsToAttributesMap[] = {\n";
+  OS << "  static const uint16_t IntrinsicsToAttributesMap[] = {\n";
 
   for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
     const CodeGenIntrinsic &intrinsic = Ints[i];

From fbb4aa08510ea87c8000389329b7d8cc9e348b5d Mon Sep 17 00:00:00 2001
From: Jeroen Dobbelaere <jeroen.dobbelaere@synopsys.com>
Date: Mon, 1 Feb 2021 09:23:33 +0100
Subject: [PATCH 090/244] [LoopPeel] Use llvm.experimental.noalias.scope.decl
 for duplicating noalias metadata as needed.

The reduction of a sanitizer build failure when enabling the dominance check (D95335) showed that loop peeling also needs to take care of scope duplication, just like loop unrolling (D92887).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D95544

(cherry picked from commit 80cdd30eb90c3509bf315f1fa1369483e2448bbd)
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  19 ++-
 .../peel-loop-noalias-scope-decl.ll           | 149 ++++++++++++++++++
 2 files changed, 166 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-noalias-scope-decl.ll

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index cb5fee7d28e6..befacb591762 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -509,7 +509,7 @@ static void cloneLoopBlocks(
     SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
     SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
     ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
-    LoopInfo *LI) {
+    LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   BasicBlock *PreHeader = L->getLoopPreheader();
@@ -545,6 +545,15 @@ static void cloneLoopBlocks(
     }
   }
 
+  {
+    // Identify what other metadata depends on the cloned version. After
+    // cloning, replace the metadata with the corrected version for both
+    // memory instructions and noalias intrinsics.
+    std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
+    cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+                               Header->getContext(), Ext);
+  }
+
   // Recursively create the new Loop objects for nested loops, if any,
   // to preserve LoopInfo.
   for (Loop *ChildLoop : *L) {
@@ -769,13 +778,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   uint64_t ExitWeight = 0, FallThroughWeight = 0;
   initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
 
+  // Identify what noalias metadata is inside the loop: if it is inside the
+  // loop, the associated metadata must be cloned for each iteration.
+  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
   // For each peeled-off iteration, make a copy of the loop.
   for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
     SmallVector<BasicBlock *, 8> NewBlocks;
     ValueToValueMapTy VMap;
 
     cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
-                    LoopBlocks, VMap, LVMap, DT, LI);
+                    LoopBlocks, VMap, LVMap, DT, LI,
+                    LoopLocalNoAliasDeclScopes);
 
     // Remap to use values from the current iteration instead of the
     // previous one.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-noalias-scope-decl.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-noalias-scope-decl.ll
new file mode 100644
index 000000000000..3929c5d5bb57
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-noalias-scope-decl.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=1 | FileCheck %s
+; RUN: opt < %s -S -passes='loop-unroll<peeling;no-runtime>' -unroll-force-peel-count=1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Loop peeling must result in valid scope declartions
+
+define internal fastcc void @test01(i8* %p0, i8* %p1, i8* %p2) unnamed_addr align 2 {
+; CHECK-LABEL: @test01(
+; CHECK-NEXT:  for.body47.lr.ph:
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !0)
+; CHECK-NEXT:    br label [[FOR_BODY47_PEEL_BEGIN:%.*]]
+; CHECK:       for.body47.peel.begin:
+; CHECK-NEXT:    br label [[FOR_BODY47_PEEL:%.*]]
+; CHECK:       for.body47.peel:
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !3)
+; CHECK-NEXT:    store i8 42, i8* [[P0:%.*]], align 1, !alias.scope !3
+; CHECK-NEXT:    store i8 43, i8* [[P1:%.*]], align 1, !alias.scope !0
+; CHECK-NEXT:    store i8 44, i8* [[P2:%.*]], align 1, !alias.scope !5
+; CHECK-NEXT:    store i8 42, i8* [[P0]], align 1, !noalias !3
+; CHECK-NEXT:    store i8 43, i8* [[P1]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 44, i8* [[P2]], align 1, !noalias !5
+; CHECK-NEXT:    [[CMP52_PEEL:%.*]] = icmp eq i32 0, 0
+; CHECK-NEXT:    br i1 [[CMP52_PEEL]], label [[COND_TRUE_PEEL:%.*]], label [[COND_END_PEEL:%.*]]
+; CHECK:       cond.true.peel:
+; CHECK-NEXT:    store i8 52, i8* [[P0]], align 1, !alias.scope !3
+; CHECK-NEXT:    store i8 53, i8* [[P1]], align 1, !alias.scope !0
+; CHECK-NEXT:    store i8 54, i8* [[P2]], align 1, !alias.scope !5
+; CHECK-NEXT:    store i8 52, i8* [[P0]], align 1, !noalias !3
+; CHECK-NEXT:    store i8 53, i8* [[P1]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 54, i8* [[P2]], align 1, !noalias !5
+; CHECK-NEXT:    br label [[COND_END_PEEL]]
+; CHECK:       cond.end.peel:
+; CHECK-NEXT:    store i8 62, i8* [[P0]], align 1, !alias.scope !3
+; CHECK-NEXT:    store i8 63, i8* [[P1]], align 1, !alias.scope !0
+; CHECK-NEXT:    store i8 64, i8* [[P2]], align 1, !alias.scope !5
+; CHECK-NEXT:    store i8 62, i8* [[P0]], align 1, !noalias !3
+; CHECK-NEXT:    store i8 63, i8* [[P1]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 64, i8* [[P2]], align 1, !noalias !5
+; CHECK-NEXT:    [[INC_PEEL:%.*]] = add nuw i32 0, 1
+; CHECK-NEXT:    [[EXITCOND_NOT_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], undef
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT_PEEL]], label [[FOR_COND_CLEANUP46:%.*]], label [[FOR_BODY47_PEEL_NEXT:%.*]]
+; CHECK:       for.body47.peel.next:
+; CHECK-NEXT:    br label [[FOR_BODY47_PEEL_NEXT1:%.*]]
+; CHECK:       for.body47.peel.next1:
+; CHECK-NEXT:    br label [[FOR_BODY47_LR_PH_PEEL_NEWPH:%.*]]
+; CHECK:       for.body47.lr.ph.peel.newph:
+; CHECK-NEXT:    br label [[FOR_BODY47:%.*]]
+; CHECK:       for.cond.cleanup46.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP46]]
+; CHECK:       for.cond.cleanup46:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body47:
+; CHECK-NEXT:    [[J_02:%.*]] = phi i32 [ [[INC_PEEL]], [[FOR_BODY47_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !6)
+; CHECK-NEXT:    store i8 42, i8* [[P0]], align 1, !alias.scope !6
+; CHECK-NEXT:    store i8 43, i8* [[P1]], align 1, !alias.scope !0
+; CHECK-NEXT:    store i8 44, i8* [[P2]], align 1, !alias.scope !8
+; CHECK-NEXT:    store i8 42, i8* [[P0]], align 1, !noalias !6
+; CHECK-NEXT:    store i8 43, i8* [[P1]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 44, i8* [[P2]], align 1, !noalias !8
+; CHECK-NEXT:    br i1 false, label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    store i8 52, i8* [[P0]], align 1, !alias.scope !6
+; CHECK-NEXT:    store i8 53, i8* [[P1]], align 1, !alias.scope !0
+; CHECK-NEXT:    store i8 54, i8* [[P2]], align 1, !alias.scope !8
+; CHECK-NEXT:    store i8 52, i8* [[P0]], align 1, !noalias !6
+; CHECK-NEXT:    store i8 53, i8* [[P1]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 54, i8* [[P2]], align 1, !noalias !8
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    store i8 62, i8* [[P0]], align 1, !alias.scope !6
+; CHECK-NEXT:    store i8 63, i8* [[P1]], align 1, !alias.scope !0
+; CHECK-NEXT:    store i8 64, i8* [[P2]], align 1, !alias.scope !8
+; CHECK-NEXT:    store i8 62, i8* [[P0]], align 1, !noalias !6
+; CHECK-NEXT:    store i8 63, i8* [[P1]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 64, i8* [[P2]], align 1, !noalias !8
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_02]], 1
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND_CLEANUP46_LOOPEXIT:%.*]], label [[FOR_BODY47]], [[LOOP9:!llvm.loop !.*]]
+;
+for.body47.lr.ph:
+  call void @llvm.experimental.noalias.scope.decl(metadata !5)
+  br label %for.body47
+
+for.cond.cleanup46:                               ; preds = %cond.end
+  ret void
+
+for.body47:                                       ; preds = %cond.end, %for.body47.lr.ph
+  %j.02 = phi i32 [ 0, %for.body47.lr.ph ], [ %inc, %cond.end ]
+  call void @llvm.experimental.noalias.scope.decl(metadata !0)
+  store i8 42, i8* %p0, !alias.scope !0
+  store i8 43, i8* %p1, !alias.scope !5
+  store i8 44, i8* %p2, !alias.scope !7
+  store i8 42, i8* %p0, !noalias !0
+  store i8 43, i8* %p1, !noalias !5
+  store i8 44, i8* %p2, !noalias !7
+  %cmp52 = icmp eq i32 %j.02, 0
+  br i1 %cmp52, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %for.body47
+  store i8 52, i8* %p0, !alias.scope !0
+  store i8 53, i8* %p1, !alias.scope !5
+  store i8 54, i8* %p2, !alias.scope !7
+  store i8 52, i8* %p0, !noalias !0
+  store i8 53, i8* %p1, !noalias !5
+  store i8 54, i8* %p2, !noalias !7
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %for.body47
+  store i8 62, i8* %p0, !alias.scope !0
+  store i8 63, i8* %p1, !alias.scope !5
+  store i8 64, i8* %p2, !alias.scope !7
+  store i8 62, i8* %p0, !noalias !0
+  store i8 63, i8* %p1, !noalias !5
+  store i8 64, i8* %p2, !noalias !7
+  %inc = add nuw i32 %j.02, 1
+  %exitcond.not = icmp eq i32 %inc, undef
+  br i1 %exitcond.not, label %for.cond.cleanup46, label %for.body47, !llvm.loop !3
+}
+
+; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
+declare void @llvm.experimental.noalias.scope.decl(metadata) #0
+
+attributes #0 = { inaccessiblememonly nofree nosync nounwind willreturn }
+
+!0 = !{!1}
+!1 = distinct !{!1, !2, !"foo: %inner.result"}
+!2 = distinct !{!2, !"foo"}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.mustprogress"}
+!5 = !{!6}
+!6 = distinct !{!6, !2, !"foo: %outer.result"}
+!7 = !{!1, !6}
+
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2, !"foo: %outer.result"}
+; CHECK: !2 = distinct !{!2, !"foo"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2, !"foo: %inner.result:Peel0"}
+; CHECK: !5 = !{!4, !1}
+; CHECK: !6 = !{!7}
+; CHECK: !7 = distinct !{!7, !2, !"foo: %inner.result"}
+; CHECK: !8 = !{!7, !1}
+; CHECK: !9 = distinct !{!9, !10, !11, !12}
+; CHECK: !10 = !{!"llvm.loop.mustprogress"}
+; CHECK: !11 = !{!"llvm.loop.peeled.count", i32 1}
+; CHECK: !12 = !{!"llvm.loop.unroll.disable"}

From 66b319327bce68377c700b2b57a109498c5500bd Mon Sep 17 00:00:00 2001
From: Jeroen Dobbelaere <jeroen.dobbelaere@synopsys.com>
Date: Tue, 2 Feb 2021 17:55:06 +0100
Subject: [PATCH 091/244] [InlineFunction] Only update noalias scopes once for
 an instruction.

Inlining sometimes maps different instructions to be inlined onto the same instruction.

We must ensure to only remap the noalias scopes once. Otherwise the scope might disappear (at best).
This patch ensures that we only replace scopes for which the mapping is known.

This approach is preferred over tracking which instructions we already handled in a SmallPtrSet,
as that one will need more memory.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D95862

(cherry picked from commit 50c523a9d4402c69d59c0b2ecb383a763d16cde9)
---
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 12 +++++--
 llvm/test/Transforms/Inline/noalias3.ll      | 35 ++++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/noalias3.ll

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 0ac8fa537f4e..3026342cc4a6 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -921,14 +921,20 @@ void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
     if (!I)
       continue;
 
+    // Only update scopes when we find them in the map. If they are not, it is
+    // because we already handled that instruction before. This is faster than
+    // tracking which instructions we already updated.
     if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
-      I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]);
+      if (MDNode *MNew = MDMap.lookup(M))
+        I->setMetadata(LLVMContext::MD_alias_scope, MNew);
 
     if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
-      I->setMetadata(LLVMContext::MD_noalias, MDMap[M]);
+      if (MDNode *MNew = MDMap.lookup(M))
+        I->setMetadata(LLVMContext::MD_noalias, MNew);
 
     if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
-      Decl->setScopeList(MDMap[Decl->getScopeList()]);
+      if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
+        Decl->setScopeList(MNew);
   }
 }
 
diff --git a/llvm/test/Transforms/Inline/noalias3.ll b/llvm/test/Transforms/Inline/noalias3.ll
new file mode 100644
index 000000000000..b94cbd6ab72f
--- /dev/null
+++ b/llvm/test/Transforms/Inline/noalias3.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -inline -S < %s | FileCheck %s
+
+define void @caller(i8* %ptr) {
+; CHECK-LABEL: define {{[^@]+}}@caller
+; CHECK-SAME: (i8* [[PTR:%.*]]) {
+; CHECK-NEXT:    [[I_I:%.*]] = load i8, i8* [[PTR]], align 1, !alias.scope !0
+; CHECK-NEXT:    ret void
+;
+  call void @callee(i8* %ptr)
+  ret void
+}
+
+define void @callee(i8* %ptr) {
+; CHECK-LABEL: define {{[^@]+}}@callee
+; CHECK-SAME: (i8* [[PTR:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = load i8, i8* [[PTR]], align 1, !alias.scope !3
+; CHECK-NEXT:    br label [[DUMMY:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    [[I_COPY:%.*]] = phi i8 [ [[I]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %i = load i8, i8* %ptr, !alias.scope !0
+  br label %dummy
+
+dummy:
+  %i.copy = phi i8 [ %i, %entry ]
+  ret void
+}
+
+!0 = !{!1}
+!1 = distinct !{!1, !2}
+!2 = distinct !{!2}

From 54b68d56dc957457b54c700af8e24a2c86539cc3 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Wed, 3 Feb 2021 05:11:28 +0000
Subject: [PATCH 092/244] [clang-tidy] Fix crash in
 readability-identifier-naming check

`isParamInMainLikeFunction` didn't check if the function had an identifer name before calling getName() which could lead to an assert.

(cherry picked from commit c97592c5df09850404a9ddbfb614c7df271d1dfe)
---
 .../clang-tidy/readability/IdentifierNamingCheck.cpp          | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
index d33040a00e15..867b074ca6db 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
@@ -352,6 +352,10 @@ static bool isParamInMainLikeFunction(const ParmVarDecl &ParmDecl,
     return false;
   if (FDecl->getAccess() != AS_public && FDecl->getAccess() != AS_none)
     return false;
+  // If the function doesn't have a name thats an identifier, can occur of the
+  // function is an operator overload, bail out early.
+  if (!FDecl->getDeclName().isIdentifier())
+    return false;
   enum MainType { None, Main, WMain };
   auto IsCharPtrPtr = [](QualType QType) -> MainType {
     if (QType.isNull())

From 72db3a9104a4f1b80fc26c597423ee7c66bd350d Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 15 Feb 2021 11:40:39 -0800
Subject: [PATCH 093/244] workflows: Increase the fetch-depth for
 actions/checkout steps

This avoids failures when many commits are pushed close together.
---
 .github/workflows/clang-tests.yml        | 2 +-
 .github/workflows/libclang-abi-tests.yml | 2 +-
 .github/workflows/libclc-tests.yml       | 2 +-
 .github/workflows/lld-tests.yml          | 2 +-
 .github/workflows/lldb-tests.yml         | 2 +-
 .github/workflows/llvm-tests.yml         | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml
index af0b5eabeeda..d37637e4b927 100644
--- a/.github/workflows/clang-tests.yml
+++ b/.github/workflows/clang-tests.yml
@@ -35,7 +35,7 @@ jobs:
       uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
-        fetch-depth: 1
+        fetch-depth: 250
     - name: Test clang
       uses: llvm/actions/build-test-llvm-project@main
       with:
diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml
index 320a88c1d407..ed54c4a1e54d 100644
--- a/.github/workflows/libclang-abi-tests.yml
+++ b/.github/workflows/libclang-abi-tests.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Checkout source
         uses: actions/checkout@v1
         with:
-          fetch-depth: 1
+          fetch-depth: 250
 
       - name: Get LLVM version
         id: version
diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml
index 188eecfc3b89..6be01c839f44 100644
--- a/.github/workflows/libclc-tests.yml
+++ b/.github/workflows/libclc-tests.yml
@@ -38,7 +38,7 @@ jobs:
       uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
-        fetch-depth: 1
+        fetch-depth: 250
     - name: Build clang
       uses: llvm/actions/build-test-llvm-project@main
       with:
diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml
index bdf0c2fcd886..1e5540d2fc4d 100644
--- a/.github/workflows/lld-tests.yml
+++ b/.github/workflows/lld-tests.yml
@@ -35,7 +35,7 @@ jobs:
       uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
-        fetch-depth: 1
+        fetch-depth: 250
     - name: Test lld
       uses: llvm/actions/build-test-llvm-project@main
       with:
diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml
index 68aec6036995..1658c0e001a0 100644
--- a/.github/workflows/lldb-tests.yml
+++ b/.github/workflows/lldb-tests.yml
@@ -40,7 +40,7 @@ jobs:
       uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
-        fetch-depth: 1
+        fetch-depth: 250
     - name: Build lldb
       uses: llvm/actions/build-test-llvm-project@main
       with:
diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml
index 1fcd67a10078..9017a014be02 100644
--- a/.github/workflows/llvm-tests.yml
+++ b/.github/workflows/llvm-tests.yml
@@ -33,7 +33,7 @@ jobs:
       uses: llvm/actions/install-ninja@main
     - uses: actions/checkout@v1
       with:
-        fetch-depth: 1
+        fetch-depth: 250
     - name: Test llvm
       uses: llvm/actions/build-test-llvm-project@main
       with:
@@ -52,7 +52,7 @@ jobs:
       - name: Checkout source
         uses: actions/checkout@v1
         with:
-          fetch-depth: 1
+          fetch-depth: 250
 
       - name: Get LLVM version
         id: version

From 3fe28ce26a3302c799ad731b9b0a8408bf553aef Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sat, 6 Feb 2021 11:42:02 -0600
Subject: [PATCH 094/244] [AssumptionCache] Do not track llvm.assume calls
 (PR49043)

This fixes PR49043 by invalidating the handle on RAUW. This will work
fine assuming all existing RAUW users add the new assumption to the
cache. That means, if a new llvm.assume call replaces an old one, you
need to add the new one now as a RAUW is not enough anymore.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D96208

(cherry picked from commit 378f4e5ec26c3e0d2119c1112ec645b369eed2de)
---
 llvm/include/llvm/Analysis/AssumptionCache.h |  2 +-
 llvm/test/Transforms/GVNSink/assumption.ll   | 32 ++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/GVNSink/assumption.ll

diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 0ef63dc68e1c..c4602d3449c0 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -45,7 +45,7 @@ class AssumptionCache {
   enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() };
 
   struct ResultElem {
-    WeakTrackingVH Assume;
+    WeakVH Assume;
 
     /// contains either ExprResultIdx or the index of the operand bundle
     /// containing the knowledge.
diff --git a/llvm/test/Transforms/GVNSink/assumption.ll b/llvm/test/Transforms/GVNSink/assumption.ll
new file mode 100644
index 000000000000..6b3d832435dc
--- /dev/null
+++ b/llvm/test/Transforms/GVNSink/assumption.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -S -passes="print<assumptions>,gvn-sink,loop-unroll" -unroll-count=3 | FileCheck %s
+;
+; This crashed because the cached assumption was replaced and the replacement
+; was then in the cache twice.
+;
+; PR49043
+
+@g = external global i32
+
+define void @main() {
+bb:
+  %i1.i = load volatile i32, i32* @g
+  %i32.i = icmp eq i32 %i1.i, 0
+  call void @llvm.assume(i1 %i32.i) #3
+  br label %bb4.i
+
+bb4.i:                                            ; preds = %bb4.i, %bb
+  %i.i = load volatile i32, i32* @g
+  %i3.i = icmp eq i32 %i.i, 0
+  call void @llvm.assume(i1 %i3.i) #3
+  br label %bb4.i
+
+func_1.exit:                                      ; No predecessors!
+  unreachable
+}
+
+declare void @llvm.assume(i1)
+
+; CHECK:  call void @llvm.assume(
+; CHECK:  call void @llvm.assume(
+; CHECK:  call void @llvm.assume(
+

From 343ba9730b7d2166b5ce61f770413782573ea224 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Tue, 2 Feb 2021 17:24:53 -0600
Subject: [PATCH 095/244] [OpenMP][NFC] Pre-commit test changes regarding
 PR48933

This will highlight the effective changes in subsequent commits.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D95903

(cherry picked from commit 3b2f19d0bc2803697526191a8a607efa0b38f7e4)
---
 .../nvptx_unsupported_type_messages.cpp       | 128 +++++++++++++++++-
 1 file changed, 126 insertions(+), 2 deletions(-)

diff --git a/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp b/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
index 814a4756c01b..0601728caefe 100644
--- a/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
+++ b/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
@@ -77,11 +77,135 @@ T1 bar1() {
 void baz1() {
   T1 t = bar1();
 }
+
+// TODO: We should not emit an error for dead functions we do not emit.
+inline void dead_inline_declare_target() {
+// expected-note@+1 {{'b' defined here}}
+  long double *a, b = 0;
+// expected-error@+1 {{'b' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  a = &b;
+}
+// TODO: We should not emit an error for dead functions we do not emit.
+static void dead_static_declare_target() {
+// expected-note@+1 {{'b' defined here}}
+  long double *a, b = 0;
+// expected-error@+1 {{'b' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  a = &b;
+}
+template<bool>
+void dead_template_declare_target() {
+  long double *a, b = 0;
+  a = &b;
+}
+
+// TODO: We should diagnose the return type and argument type here.
+long double ld_return1a() { return 0; }
+void ld_arg1a(long double ld) {}
+
+// TODO: We should diagnose the return type and argument type here.
+typedef long double ld_ty;
+ld_ty ld_return1b() { return 0; }
+void ld_arg1b(ld_ty ld) {}
+
+static long double ld_return1c() { return 0; }
+static void ld_arg1c(long double ld) {}
+
+inline long double ld_return1d() { return 0; }
+inline void ld_arg1d(long double ld) {}
+
+// expected-note@+1 {{'ld_return1e' defined here}}
+static long double ld_return1e() { return 0; }
+// expected-note@+1 {{'ld_arg1e' defined here}}
+static void ld_arg1e(long double ld) {}
+
+// expected-note@+1 {{'ld_return1f' defined here}}
+inline long double ld_return1f() { return 0; }
+// expected-note@+1 {{'ld_arg1f' defined here}}
+inline void ld_arg1f(long double ld) {}
+
+inline void ld_use1() {
+// expected-note@+1 {{'ld' defined here}}
+  long double ld = 0;
+// TODO: We should not diagnose this as the function is dead.
+// expected-error@+1 {{'ld' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  ld += 1;
+}
+static void ld_use2() {
+// expected-note@+1 {{'ld' defined here}}
+  long double ld = 0;
+// TODO: We should not diagnose this as the function is dead.
+// expected-error@+1 {{'ld' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  ld += 1;
+}
+
+inline void ld_use3() {
+// expected-note@+1 {{'ld' defined here}}
+  long double ld = 0;
+// expected-error@+1 {{'ld' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  ld += 1;
+}
+static void ld_use4() {
+// expected-note@+1 {{'ld' defined here}}
+  long double ld = 0;
+// expected-error@+1 {{'ld' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  ld += 1;
+}
+
+void external() {
+// expected-error@+1 {{'ld_return1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  void *p1 = reinterpret_cast<void*>(&ld_return1e);
+// expected-error@+1 {{'ld_arg1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  void *p2 = reinterpret_cast<void*>(&ld_arg1e);
+// expected-error@+1 {{'ld_return1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  void *p3 = reinterpret_cast<void*>(&ld_return1f);
+// expected-error@+1 {{'ld_arg1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  void *p4 = reinterpret_cast<void*>(&ld_arg1f);
+  void *p5 = reinterpret_cast<void*>(&ld_use3);
+  void *p6 = reinterpret_cast<void*>(&ld_use4);
+}
+
+#ifndef _ARCH_PPC
+// TODO: We should diagnose the return type and argument type here.
+__float128 ld_return2a() { return 0; }
+void ld_arg2a(__float128 ld) {}
+
+// TODO: We should diagnose the return type and argument type here.
+typedef __float128 fp128_ty;
+fp128_ty ld_return2b() { return 0; }
+void ld_arg2b(fp128_ty ld) {}
+#endif
+
 #pragma omp end declare target
 
+// TODO: There should not be an error here, dead_inline is never emitted.
+// expected-note@+1 3{{'f' defined here}}
+inline long double dead_inline(long double f) {
+#pragma omp target map(f)
+// TODO: We should not emit the same error message 3 times, here and elsewhere in this file.
+  // expected-error@+1 3{{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  f = 1;
+  return f;
+}
+
+// TODO: There should not be an error here, dead_static is never emitted.
+// expected-note@+1 3{{'f' defined here}}
+static long double dead_static(long double f) {
+#pragma omp target map(f)
+  // expected-error@+1 3{{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  f = 1;
+  return f;
+}
+
+template<typename T>
+long double dead_template(long double f) {
+#pragma omp target map(f)
+  f = 1;
+  return f;
+}
+
 #ifndef _ARCH_PPC
 // expected-note@+1 3{{'f' defined here}}
-__float128 foo1(__float128 f) {
+__float128 foo2(__float128 f) {
 #pragma omp target map(f)
   // expected-error@+1 3{{'f' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   f = 1;
@@ -89,7 +213,7 @@ __float128 foo1(__float128 f) {
 }
 #else
 // expected-note@+1 3{{'f' defined here}}
-long double foo1(long double f) {
+long double foo3(long double f) {
 #pragma omp target map(f)
   // expected-error@+1 3{{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   f = 1;

From 3b9ea2dc8eeb3c3893213f57d532b32ef3619859 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Fri, 29 Jan 2021 02:42:20 -0600
Subject: [PATCH 096/244] [OpenMP] Attribute target diagnostics properly

Type errors in function declarations were not (always) diagnosed prior
to this patch. Furthermore, certain remarks did not get associated
properly which caused them to be emitted multiple times.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95912

(cherry picked from commit f9286b434b764b366f1aad9249c04e7741ed5518)
---
 clang/include/clang/Sema/Sema.h               | 16 +++--
 clang/lib/Sema/Sema.cpp                       | 36 ++++++----
 clang/lib/Sema/SemaDecl.cpp                   |  3 +
 clang/lib/Sema/SemaExpr.cpp                   |  2 +-
 clang/lib/Sema/SemaOpenMP.cpp                 | 13 ++--
 .../nvptx_unsupported_type_messages.cpp       | 65 +++++++++++++------
 6 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 7f7c84eb1b1d..42814f6ba8f6 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11948,8 +11948,8 @@ class Sema final {
   ///  if (diagIfOpenMPDeviceCode(Loc, diag::err_vla_unsupported))
   ///    return ExprError();
   ///  // Otherwise, continue parsing as normal.
-  SemaDiagnosticBuilder diagIfOpenMPDeviceCode(SourceLocation Loc,
-                                               unsigned DiagID);
+  SemaDiagnosticBuilder
+  diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID, FunctionDecl *FD);
 
   /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
   /// context is "used as host code".
@@ -11965,17 +11965,19 @@ class Sema final {
   ///    return ExprError();
   ///  // Otherwise, continue parsing as normal.
   SemaDiagnosticBuilder diagIfOpenMPHostCode(SourceLocation Loc,
-                                             unsigned DiagID);
+                                             unsigned DiagID, FunctionDecl *FD);
 
-  SemaDiagnosticBuilder targetDiag(SourceLocation Loc, unsigned DiagID);
+  SemaDiagnosticBuilder targetDiag(SourceLocation Loc, unsigned DiagID,
+                                   FunctionDecl *FD = nullptr);
   SemaDiagnosticBuilder targetDiag(SourceLocation Loc,
-                                   const PartialDiagnostic &PD) {
-    return targetDiag(Loc, PD.getDiagID()) << PD;
+                                   const PartialDiagnostic &PD,
+                                   FunctionDecl *FD = nullptr) {
+    return targetDiag(Loc, PD.getDiagID(), FD) << PD;
   }
 
   /// Check if the expression is allowed to be used in expressions for the
   /// offloading devices.
-  void checkDeviceDecl(const ValueDecl *D, SourceLocation Loc);
+  void checkDeviceDecl(ValueDecl *D, SourceLocation Loc);
 
   enum CUDAFunctionTarget {
     CFT_Device,
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index cb5a84a31235..450f9c020f7f 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -14,6 +14,7 @@
 #include "UsedDeclVisitor.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTDiagnostic.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclObjC.h"
@@ -1740,11 +1741,12 @@ Sema::SemaDiagnosticBuilder::~SemaDiagnosticBuilder() {
   }
 }
 
-Sema::SemaDiagnosticBuilder Sema::targetDiag(SourceLocation Loc,
-                                             unsigned DiagID) {
+Sema::SemaDiagnosticBuilder
+Sema::targetDiag(SourceLocation Loc, unsigned DiagID, FunctionDecl *FD) {
+  FD = FD ? FD : getCurFunctionDecl();
   if (LangOpts.OpenMP)
-    return LangOpts.OpenMPIsDevice ? diagIfOpenMPDeviceCode(Loc, DiagID)
-                                   : diagIfOpenMPHostCode(Loc, DiagID);
+    return LangOpts.OpenMPIsDevice ? diagIfOpenMPDeviceCode(Loc, DiagID, FD)
+                                   : diagIfOpenMPHostCode(Loc, DiagID, FD);
   if (getLangOpts().CUDA)
     return getLangOpts().CUDAIsDevice ? CUDADiagIfDeviceCode(Loc, DiagID)
                                       : CUDADiagIfHostCode(Loc, DiagID);
@@ -1753,7 +1755,7 @@ Sema::SemaDiagnosticBuilder Sema::targetDiag(SourceLocation Loc,
     return SYCLDiagIfDeviceCode(Loc, DiagID);
 
   return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, DiagID,
-                               getCurFunctionDecl(), *this);
+                               FD, *this);
 }
 
 Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID,
@@ -1772,15 +1774,14 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID,
                                  DiagID, getCurFunctionDecl(), *this);
   }
 
-  SemaDiagnosticBuilder DB =
-      getLangOpts().CUDAIsDevice
-          ? CUDADiagIfDeviceCode(Loc, DiagID)
-          : CUDADiagIfHostCode(Loc, DiagID);
+  SemaDiagnosticBuilder DB = getLangOpts().CUDAIsDevice
+                                 ? CUDADiagIfDeviceCode(Loc, DiagID)
+                                 : CUDADiagIfHostCode(Loc, DiagID);
   SetIsLastErrorImmediate(DB.isImmediate());
   return DB;
 }
 
-void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
+void Sema::checkDeviceDecl(ValueDecl *D, SourceLocation Loc) {
   if (isUnevaluatedContext())
     return;
 
@@ -1798,13 +1799,17 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
         return;
   }
 
+  // Try to associate errors with the lexical context, if that is a function, or
+  // the value declaration otherwise.
+  FunctionDecl *FD =
+      isa<FunctionDecl>(C) ? cast<FunctionDecl>(C) : dyn_cast<FunctionDecl>(D);
   auto CheckType = [&](QualType Ty) {
     if (Ty->isDependentType())
       return;
 
     if (Ty->isExtIntType()) {
       if (!Context.getTargetInfo().hasExtIntType()) {
-        targetDiag(Loc, diag::err_device_unsupported_type)
+        targetDiag(Loc, diag::err_device_unsupported_type, FD)
             << D << false /*show bit size*/ << 0 /*bitsize*/
             << Ty << Context.getTargetInfo().getTriple().str();
       }
@@ -1817,11 +1822,12 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
          !Context.getTargetInfo().hasFloat128Type()) ||
         (Ty->isIntegerType() && Context.getTypeSize(Ty) == 128 &&
          !Context.getTargetInfo().hasInt128Type())) {
-      targetDiag(Loc, diag::err_device_unsupported_type)
+      if (targetDiag(Loc, diag::err_device_unsupported_type, FD)
           << D << true /*show bit size*/
           << static_cast<unsigned>(Context.getTypeSize(Ty)) << Ty
-          << Context.getTargetInfo().getTriple().str();
-      targetDiag(D->getLocation(), diag::note_defined_here) << D;
+          << Context.getTargetInfo().getTriple().str())
+        D->setInvalidDecl();
+      targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
     }
   };
 
@@ -1833,6 +1839,8 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
       CheckType(ParamTy);
     CheckType(FPTy->getReturnType());
   }
+  if (const auto *FNPTy = dyn_cast<FunctionNoProtoType>(Ty))
+    CheckType(FNPTy->getReturnType());
 }
 
 /// Looks through the macro-expansion chain for the given
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 3ee0c43097d7..6457c6d024cf 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9420,6 +9420,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
   }
 
+  if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice))
+    checkDeviceDecl(NewFD, D.getBeginLoc());
+
   if (!getLangOpts().CPlusPlus) {
     // Perform semantic checking on the function declaration.
     if (!NewFD->isInvalidDecl() && NewFD->isMain())
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 45616dadcbee..ae8508d6c601 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -373,7 +373,7 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
   }
 
   if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) {
-    if (const auto *VD = dyn_cast<ValueDecl>(D))
+    if (auto *VD = dyn_cast<ValueDecl>(D))
       checkDeviceDecl(VD, Loc);
 
     if (!Context.getTargetInfo().isTLSSupported())
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 78707484f588..596aa4b7b5c1 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1898,11 +1898,11 @@ enum class FunctionEmissionStatus {
 } // anonymous namespace
 
 Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
-                                                         unsigned DiagID) {
+                                                         unsigned DiagID,
+                                                         FunctionDecl *FD) {
   assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice &&
          "Expected OpenMP device compilation.");
 
-  FunctionDecl *FD = getCurFunctionDecl();
   SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop;
   if (FD) {
     FunctionEmissionStatus FES = getEmissionStatus(FD);
@@ -1925,14 +1925,15 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
     }
   }
 
-  return SemaDiagnosticBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this);
+  return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this);
 }
 
 Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc,
-                                                       unsigned DiagID) {
+                                                       unsigned DiagID,
+                                                       FunctionDecl *FD) {
   assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice &&
          "Expected OpenMP host compilation.");
-  FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl());
+  FunctionEmissionStatus FES = getEmissionStatus(FD);
   SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop;
   switch (FES) {
   case FunctionEmissionStatus::Emitted:
@@ -1948,7 +1949,7 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc,
     break;
   }
 
-  return SemaDiagnosticBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this);
+  return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this);
 }
 
 static OpenMPDefaultmapClauseKind
diff --git a/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp b/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
index 0601728caefe..1b89a891887d 100644
--- a/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
+++ b/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
@@ -39,10 +39,12 @@ struct T1 {
 };
 
 #ifndef _ARCH_PPC
-// expected-note@+1 {{'boo' defined here}}
+// expected-error@+2 {{'boo' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+// expected-note@+1 2{{'boo' defined here}}
 void boo(__float128 A) { return; }
 #else
-// expected-note@+1 {{'boo' defined here}}
+// expected-error@+2 {{'boo' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+// expected-note@+1 2{{'boo' defined here}}
 void boo(long double A) { return; }
 #endif
 #pragma omp declare target
@@ -51,10 +53,11 @@ T f = a;
 void foo(T a = T()) {
   a = a + f; // expected-note {{called by 'foo'}}
 #ifndef _ARCH_PPC
-// expected-error@+4 {{'boo' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+// expected-error@+5 {{'boo' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 #else
-// expected-error@+2 {{'boo' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+// expected-error@+3 {{'boo' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 #endif
+// expected-note@+1 {{called by 'foo'}}
   boo(0);
   return;
 }
@@ -98,28 +101,49 @@ void dead_template_declare_target() {
   a = &b;
 }
 
-// TODO: We should diagnose the return type and argument type here.
+// expected-note@+2 {{'ld_return1a' defined here}}
+// expected-error@+1 {{'ld_return1a' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 long double ld_return1a() { return 0; }
+// expected-note@+2 {{'ld_arg1a' defined here}}
+// expected-error@+1 {{'ld_arg1a' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 void ld_arg1a(long double ld) {}
 
 // TODO: We should diagnose the return type and argument type here.
 typedef long double ld_ty;
+// expected-note@+2 {{'ld_return1b' defined here}}
+// expected-error@+1 {{'ld_return1b' requires 128 bit size 'ld_ty' (aka 'long double') type support, but device 'nvptx64-unknown-unknown' does not support it}}
 ld_ty ld_return1b() { return 0; }
+// expected-note@+2 {{'ld_arg1b' defined here}}
+// expected-error@+1 {{'ld_arg1b' requires 128 bit size 'ld_ty' (aka 'long double') type support, but device 'nvptx64-unknown-unknown' does not support it}}
 void ld_arg1b(ld_ty ld) {}
 
+// TODO: These errors should not be emitted.
+// expected-note@+2 {{'ld_return1c' defined here}}
+// expected-error@+1 {{'ld_return1c' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 static long double ld_return1c() { return 0; }
+// expected-note@+2 {{'ld_arg1c' defined here}}
+// expected-error@+1 {{'ld_arg1c' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 static void ld_arg1c(long double ld) {}
 
+// TODO: These errors should not be emitted.
+// expected-note@+2 {{'ld_return1d' defined here}}
+// expected-error@+1 {{'ld_return1d' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 inline long double ld_return1d() { return 0; }
+// expected-note@+2 {{'ld_arg1d' defined here}}
+// expected-error@+1 {{'ld_arg1d' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 inline void ld_arg1d(long double ld) {}
 
+// expected-error@+2 {{'ld_return1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_return1e' defined here}}
 static long double ld_return1e() { return 0; }
+// expected-error@+2 {{'ld_arg1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_arg1e' defined here}}
 static void ld_arg1e(long double ld) {}
 
+// expected-error@+2 {{'ld_return1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_return1f' defined here}}
 inline long double ld_return1f() { return 0; }
+// expected-error@+2 {{'ld_arg1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_arg1f' defined here}}
 inline void ld_arg1f(long double ld) {}
 
@@ -152,46 +176,47 @@ static void ld_use4() {
 }
 
 void external() {
-// expected-error@+1 {{'ld_return1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p1 = reinterpret_cast<void*>(&ld_return1e);
-// expected-error@+1 {{'ld_arg1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p2 = reinterpret_cast<void*>(&ld_arg1e);
-// expected-error@+1 {{'ld_return1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p3 = reinterpret_cast<void*>(&ld_return1f);
-// expected-error@+1 {{'ld_arg1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p4 = reinterpret_cast<void*>(&ld_arg1f);
   void *p5 = reinterpret_cast<void*>(&ld_use3);
   void *p6 = reinterpret_cast<void*>(&ld_use4);
 }
 
 #ifndef _ARCH_PPC
-// TODO: We should diagnose the return type and argument type here.
+// expected-note@+2 {{'ld_return2a' defined here}}
+// expected-error@+1 {{'ld_return2a' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 __float128 ld_return2a() { return 0; }
+// expected-note@+2 {{'ld_arg2a' defined here}}
+// expected-error@+1 {{'ld_arg2a' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 void ld_arg2a(__float128 ld) {}
 
-// TODO: We should diagnose the return type and argument type here.
 typedef __float128 fp128_ty;
+// expected-note@+2 {{'ld_return2b' defined here}}
+// expected-error@+1 {{'ld_return2b' requires 128 bit size 'fp128_ty' (aka '__float128') type support, but device 'nvptx64-unknown-unknown' does not support it}}
 fp128_ty ld_return2b() { return 0; }
+// expected-note@+2 {{'ld_arg2b' defined here}}
+// expected-error@+1 {{'ld_arg2b' requires 128 bit size 'fp128_ty' (aka '__float128') type support, but device 'nvptx64-unknown-unknown' does not support it}}
 void ld_arg2b(fp128_ty ld) {}
 #endif
 
 #pragma omp end declare target
 
 // TODO: There should not be an error here, dead_inline is never emitted.
-// expected-note@+1 3{{'f' defined here}}
+// expected-note@+1 {{'f' defined here}}
 inline long double dead_inline(long double f) {
 #pragma omp target map(f)
-// TODO: We should not emit the same error message 3 times, here and elsewhere in this file.
-  // expected-error@+1 3{{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  // expected-error@+1 {{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   f = 1;
   return f;
 }
 
 // TODO: There should not be an error here, dead_static is never emitted.
-// expected-note@+1 3{{'f' defined here}}
+// expected-note@+1 {{'f' defined here}}
 static long double dead_static(long double f) {
 #pragma omp target map(f)
-  // expected-error@+1 3{{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  // expected-error@+1 {{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   f = 1;
   return f;
 }
@@ -204,18 +229,18 @@ long double dead_template(long double f) {
 }
 
 #ifndef _ARCH_PPC
-// expected-note@+1 3{{'f' defined here}}
+// expected-note@+1 {{'f' defined here}}
 __float128 foo2(__float128 f) {
 #pragma omp target map(f)
-  // expected-error@+1 3{{'f' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  // expected-error@+1 {{'f' requires 128 bit size '__float128' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   f = 1;
   return f;
 }
 #else
-// expected-note@+1 3{{'f' defined here}}
+// expected-note@+1 {{'f' defined here}}
 long double foo3(long double f) {
 #pragma omp target map(f)
-  // expected-error@+1 3{{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
+  // expected-error@+1 {{'f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   f = 1;
   return f;
 }

From 0d14528f8082ba12e34f41cbf931a7e7425b694a Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Tue, 2 Feb 2021 11:17:44 -0600
Subject: [PATCH 097/244] [OpenMP] Delay more diagnostics of potentially
 non-emitted code

Even code in target and declare target regions might not be emitted.
With this patch we delay more diagnostics and use laziness and linkage
to determine if a function is emitted (for the device). Note that we
still eagerly emit diagnostics for target regions, unfortunately, see
the TODO for the reason.

This hopefully fixes PR48933.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D95928

(cherry picked from commit 1dd66e6111a8247c6c7931143251c0cf1442b905)
---
 clang/lib/Sema/SemaDecl.cpp                   | 85 +++++++++----------
 clang/lib/Sema/SemaOpenMP.cpp                 | 10 ++-
 clang/test/OpenMP/nvptx_allocate_messages.cpp |  3 +-
 .../nvptx_target_exceptions_messages.cpp      |  1 +
 .../nvptx_unsupported_type_messages.cpp       | 34 ++------
 5 files changed, 59 insertions(+), 74 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 6457c6d024cf..1f7ab49ccdd7 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -18332,42 +18332,51 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD,
   if (FD->isDependentContext())
     return FunctionEmissionStatus::TemplateDiscarded;
 
-  FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown;
+  // Check whether this function is an externally visible definition.
+  auto IsEmittedForExternalSymbol = [this, FD]() {
+    // We have to check the GVA linkage of the function's *definition* -- if we
+    // only have a declaration, we don't know whether or not the function will
+    // be emitted, because (say) the definition could include "inline".
+    FunctionDecl *Def = FD->getDefinition();
+
+    return Def && !isDiscardableGVALinkage(
+                      getASTContext().GetGVALinkageForFunction(Def));
+  };
+
   if (LangOpts.OpenMPIsDevice) {
+    // In OpenMP device mode we will not emit host only functions, or functions
+    // we don't need due to their linkage.
     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
         OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
-    if (DevTy.hasValue()) {
+    // DevTy may be changed later by
+    //  #pragma omp declare target to(*) device_type(*).
+    // Therefore DevTyhaving no value does not imply host. The emission status
+    // will be checked again at the end of compilation unit with Final = true.
+    if (DevTy.hasValue())
       if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host)
-        OMPES = FunctionEmissionStatus::OMPDiscarded;
-      else if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost ||
-               *DevTy == OMPDeclareTargetDeclAttr::DT_Any) {
-        OMPES = FunctionEmissionStatus::Emitted;
-      }
-    }
-  } else if (LangOpts.OpenMP) {
-    // In OpenMP 4.5 all the functions are host functions.
-    if (LangOpts.OpenMP <= 45) {
-      OMPES = FunctionEmissionStatus::Emitted;
-    } else {
-      Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
-          OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
-      // In OpenMP 5.0 or above, DevTy may be changed later by
-      // #pragma omp declare target to(*) device_type(*). Therefore DevTy
-      // having no value does not imply host. The emission status will be
-      // checked again at the end of compilation unit.
-      if (DevTy.hasValue()) {
-        if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) {
-          OMPES = FunctionEmissionStatus::OMPDiscarded;
-        } else if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host ||
-                   *DevTy == OMPDeclareTargetDeclAttr::DT_Any)
-          OMPES = FunctionEmissionStatus::Emitted;
-      } else if (Final)
-        OMPES = FunctionEmissionStatus::Emitted;
-    }
-  }
-  if (OMPES == FunctionEmissionStatus::OMPDiscarded ||
-      (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA))
-    return OMPES;
+        return FunctionEmissionStatus::OMPDiscarded;
+    // If we have an explicit value for the device type, or we are in a target
+    // declare context, we need to emit all extern and used symbols.
+    if (isInOpenMPDeclareTargetContext() || DevTy.hasValue())
+      if (IsEmittedForExternalSymbol())
+        return FunctionEmissionStatus::Emitted;
+    // Device mode only emits what it must, if it wasn't tagged yet and needed,
+    // we'll omit it.
+    if (Final)
+      return FunctionEmissionStatus::OMPDiscarded;
+  } else if (LangOpts.OpenMP > 45) {
+    // In OpenMP host compilation prior to 5.0 everything was an emitted host
+    // function. In 5.0, no_host was introduced which might cause a function to
+    // be ommitted.
+    Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+        OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
+    if (DevTy.hasValue())
+      if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+        return FunctionEmissionStatus::OMPDiscarded;
+  }
+
+  if (Final && LangOpts.OpenMP && !LangOpts.CUDA)
+    return FunctionEmissionStatus::Emitted;
 
   if (LangOpts.CUDA) {
     // When compiling for device, host functions are never emitted.  Similarly,
@@ -18381,17 +18390,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD,
         (T == Sema::CFT_Device || T == Sema::CFT_Global))
       return FunctionEmissionStatus::CUDADiscarded;
 
-    // Check whether this function is externally visible -- if so, it's
-    // known-emitted.
-    //
-    // We have to check the GVA linkage of the function's *definition* -- if we
-    // only have a declaration, we don't know whether or not the function will
-    // be emitted, because (say) the definition could include "inline".
-    FunctionDecl *Def = FD->getDefinition();
-
-    if (Def &&
-        !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def))
-        && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted))
+    if (IsEmittedForExternalSymbol())
       return FunctionEmissionStatus::Emitted;
   }
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 596aa4b7b5c1..4063c185388d 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1884,8 +1884,7 @@ void Sema::popOpenMPFunctionRegion(const FunctionScopeInfo *OldFSI) {
 static bool isOpenMPDeviceDelayedContext(Sema &S) {
   assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice &&
          "Expected OpenMP device compilation.");
-  return !S.isInOpenMPTargetExecutionDirective() &&
-         !S.isInOpenMPDeclareTargetContext();
+  return !S.isInOpenMPTargetExecutionDirective();
 }
 
 namespace {
@@ -1911,6 +1910,13 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
       Kind = SemaDiagnosticBuilder::K_Immediate;
       break;
     case FunctionEmissionStatus::Unknown:
+      // TODO: We should always delay diagnostics here in case a target
+      //       region is in a function we do not emit. However, as the
+      //       current diagnostics are associated with the function containing
+      //       the target region and we do not emit that one, we would miss out
+      //       on diagnostics for the target region itself. We need to anchor
+      //       the diagnostics with the new generated function *or* ensure we
+      //       emit diagnostics associated with the surrounding function.
       Kind = isOpenMPDeviceDelayedContext(*this)
                  ? SemaDiagnosticBuilder::K_Deferred
                  : SemaDiagnosticBuilder::K_Immediate;
diff --git a/clang/test/OpenMP/nvptx_allocate_messages.cpp b/clang/test/OpenMP/nvptx_allocate_messages.cpp
index a4d78b6ab588..9a61da73eb39 100644
--- a/clang/test/OpenMP/nvptx_allocate_messages.cpp
+++ b/clang/test/OpenMP/nvptx_allocate_messages.cpp
@@ -81,8 +81,7 @@ int main () {
 #endif // DEVICE && !REQUIRES
 #pragma omp allocate(b)
 #if defined(DEVICE) && !defined(REQUIRES)
-// expected-note@+3 {{in instantiation of function template specialization 'foo<int>' requested here}}
-// expected-note@+2 {{called by 'main'}}
+// expected-note@+2 2{{called by 'main'}}
 #endif // DEVICE && !REQUIRES
   return (foo<int>() + bar());
 }
diff --git a/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp b/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp
index c71615d2521f..87ea00a90822 100644
--- a/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp
+++ b/clang/test/OpenMP/nvptx_target_exceptions_messages.cpp
@@ -52,6 +52,7 @@ int maini1() {
 #pragma omp target map(tofrom \
                        : a, b)
   {
+    // expected-note@+1 {{called by 'maini1'}}
     S s(a);
     static long aaa = 23;
     a = foo() + bar() + b + c + d + aa + aaa + FA<int>(); // expected-note{{called by 'maini1'}}
diff --git a/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp b/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
index 1b89a891887d..a319c78f73c5 100644
--- a/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
+++ b/clang/test/OpenMP/nvptx_unsupported_type_messages.cpp
@@ -81,18 +81,12 @@ void baz1() {
   T1 t = bar1();
 }
 
-// TODO: We should not emit an error for dead functions we do not emit.
 inline void dead_inline_declare_target() {
-// expected-note@+1 {{'b' defined here}}
   long double *a, b = 0;
-// expected-error@+1 {{'b' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   a = &b;
 }
-// TODO: We should not emit an error for dead functions we do not emit.
 static void dead_static_declare_target() {
-// expected-note@+1 {{'b' defined here}}
   long double *a, b = 0;
-// expected-error@+1 {{'b' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   a = &b;
 }
 template<bool>
@@ -108,7 +102,6 @@ long double ld_return1a() { return 0; }
 // expected-error@+1 {{'ld_arg1a' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 void ld_arg1a(long double ld) {}
 
-// TODO: We should diagnose the return type and argument type here.
 typedef long double ld_ty;
 // expected-note@+2 {{'ld_return1b' defined here}}
 // expected-error@+1 {{'ld_return1b' requires 128 bit size 'ld_ty' (aka 'long double') type support, but device 'nvptx64-unknown-unknown' does not support it}}
@@ -117,48 +110,28 @@ ld_ty ld_return1b() { return 0; }
 // expected-error@+1 {{'ld_arg1b' requires 128 bit size 'ld_ty' (aka 'long double') type support, but device 'nvptx64-unknown-unknown' does not support it}}
 void ld_arg1b(ld_ty ld) {}
 
-// TODO: These errors should not be emitted.
-// expected-note@+2 {{'ld_return1c' defined here}}
-// expected-error@+1 {{'ld_return1c' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 static long double ld_return1c() { return 0; }
-// expected-note@+2 {{'ld_arg1c' defined here}}
-// expected-error@+1 {{'ld_arg1c' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 static void ld_arg1c(long double ld) {}
 
-// TODO: These errors should not be emitted.
-// expected-note@+2 {{'ld_return1d' defined here}}
-// expected-error@+1 {{'ld_return1d' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 inline long double ld_return1d() { return 0; }
-// expected-note@+2 {{'ld_arg1d' defined here}}
-// expected-error@+1 {{'ld_arg1d' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 inline void ld_arg1d(long double ld) {}
 
-// expected-error@+2 {{'ld_return1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_return1e' defined here}}
 static long double ld_return1e() { return 0; }
-// expected-error@+2 {{'ld_arg1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_arg1e' defined here}}
 static void ld_arg1e(long double ld) {}
 
-// expected-error@+2 {{'ld_return1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_return1f' defined here}}
 inline long double ld_return1f() { return 0; }
-// expected-error@+2 {{'ld_arg1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
 // expected-note@+1 {{'ld_arg1f' defined here}}
 inline void ld_arg1f(long double ld) {}
 
 inline void ld_use1() {
-// expected-note@+1 {{'ld' defined here}}
   long double ld = 0;
-// TODO: We should not diagnose this as the function is dead.
-// expected-error@+1 {{'ld' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   ld += 1;
 }
 static void ld_use2() {
-// expected-note@+1 {{'ld' defined here}}
   long double ld = 0;
-// TODO: We should not diagnose this as the function is dead.
-// expected-error@+1 {{'ld' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   ld += 1;
 }
 
@@ -176,11 +149,18 @@ static void ld_use4() {
 }
 
 void external() {
+// expected-error@+1 {{'ld_return1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p1 = reinterpret_cast<void*>(&ld_return1e);
+// expected-error@+1 {{'ld_arg1e' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p2 = reinterpret_cast<void*>(&ld_arg1e);
+// expected-error@+1 {{'ld_return1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p3 = reinterpret_cast<void*>(&ld_return1f);
+// expected-error@+1 {{'ld_arg1f' requires 128 bit size 'long double' type support, but device 'nvptx64-unknown-unknown' does not support it}}
   void *p4 = reinterpret_cast<void*>(&ld_arg1f);
+// TODO: The error message "called by" is not great.
+// expected-note@+1 {{called by 'external'}}
   void *p5 = reinterpret_cast<void*>(&ld_use3);
+// expected-note@+1 {{called by 'external'}}
   void *p6 = reinterpret_cast<void*>(&ld_use4);
 }
 

From d14016d869acac0d0196bd6b846ab45879ea0fa5 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Mon, 15 Feb 2021 18:22:01 +0100
Subject: [PATCH 098/244] Define new/delete in libc++ when using libcxxrt

Always turn on LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS, if libcxxrt is used
as the C++ ABI library, since libcxxrt does not provide the full set
ofnew and delete operators. In particular, the aligned versions of these
operators are completely missing. This primarily addresses builds on
FreeBSD, as this platform uses libcxxrt by default.

Also, attempt to provide a FreeBSD.cmake cache file, with hopefully sane
settings, partially copied from the Apple.cmake cache file. This needs
more work, probably some additions to ci build scripts (although I am
not aware of any 'official' FreeBSD build bots).

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D96720

(cherry picked from commit 328261019f50a76b11fa625739cbf32ceb2ce2f7)
---
 libcxx/cmake/Modules/HandleLibCXXABI.cmake | 2 ++
 libcxx/cmake/caches/FreeBSD.cmake          | 9 +++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 libcxx/cmake/caches/FreeBSD.cmake

diff --git a/libcxx/cmake/Modules/HandleLibCXXABI.cmake b/libcxx/cmake/Modules/HandleLibCXXABI.cmake
index c5aa26739e36..5d2764e870e9 100644
--- a/libcxx/cmake/Modules/HandleLibCXXABI.cmake
+++ b/libcxx/cmake/Modules/HandleLibCXXABI.cmake
@@ -121,6 +121,8 @@ elseif ("${LIBCXX_CXX_ABI_LIBNAME}" STREQUAL "libcxxrt")
   if(NOT LIBCXX_CXX_ABI_INCLUDE_PATHS)
     set(LIBCXX_CXX_ABI_INCLUDE_PATHS "/usr/include/c++/v1")
   endif()
+  # libcxxrt does not provide aligned new and delete operators
+  set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON)
   setup_abi_lib(
     "-DLIBCXXRT"
     "cxxrt" "cxxrt" "cxxabi.h;unwind.h;unwind-arm.h;unwind-itanium.h" ""
diff --git a/libcxx/cmake/caches/FreeBSD.cmake b/libcxx/cmake/caches/FreeBSD.cmake
new file mode 100644
index 000000000000..9e66e379864b
--- /dev/null
+++ b/libcxx/cmake/caches/FreeBSD.cmake
@@ -0,0 +1,9 @@
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE BOOL "")
+
+set(LIBCXX_ENABLE_ASSERTIONS OFF CACHE BOOL "")
+set(LIBCXX_ABI_VERSION "1" CACHE STRING "")
+set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
+set(LIBCXX_ENABLE_SHARED ON CACHE BOOL "")
+set(LIBCXX_CXX_ABI libcxxrt CACHE STRING "")
+set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "")

From a29ff5bae41a06302e0c90b47387e3bff8bbdd8c Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Sat, 30 Jan 2021 15:14:41 -0500
Subject: [PATCH 099/244] [OpenMP][NVPTX] Refined CMake logic to choose compute
 capabilites

This patch refines the logic to choose compute capabilites via the
environment variable `LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES`. It supports the
following values (all case insensitive):
- "all": Build `deviceRTLs` for all supported compute capabilites;
- "auto": Only build for the compute capability auto detected. Note that this
  requires CUDA. If CUDA is not found, a CMake fatal error will be raised.
- "xx,yy" or "xx;yy": Build for compute capabilities `xx` and `yy`.

If `LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES` is not set, it is equivalent to set
it to `all`.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95687

(cherry picked from commit 26d38f6d20ff137d89cb7c891b739662de1ca508)
---
 .../Modules/LibomptargetGetDependencies.cmake |  4 +--
 .../deviceRTLs/nvptx/CMakeLists.txt           | 30 +++++++++++--------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
index 28165ac1b8c0..e3c2a580396e 100644
--- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
+++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -118,9 +118,7 @@ endif()
 find_package(CUDA QUIET)
 
 # Try to get the highest Nvidia GPU architecture the system supports
-set(LIBOMPTARGET_NVPTX_AUTODETECT_COMPUTE_CAPABILITY TRUE CACHE BOOL
-  "Auto detect CUDA Compute Capability if CUDA is detected.")
-if (CUDA_FOUND AND LIBOMPTARGET_NVPTX_AUTODETECT_COMPUTE_CAPABILITY)
+if (CUDA_FOUND)
   cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS)
   string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH_OUTPUT ${CUDA_ARCH_FLAGS})
   if (NOT DEFINED CUDA_ARCH_MATCH_OUTPUT OR "${CMAKE_MATCH_1}" LESS 35)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index eeda137ef120..b705e0bb6a9f 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -10,12 +10,12 @@
 #
 ##===----------------------------------------------------------------------===##
 
-# By default we will not build NVPTX deviceRTL on a non-CUDA
+# By default we will not build NVPTX deviceRTL on a CUDA free system
 set(LIBOMPTARGET_BUILD_NVPTX_BCLIB FALSE CACHE BOOL
-  "Whether build NVPTX deviceRTL on non-CUDA system.")
+  "Whether build NVPTX deviceRTL on CUDA free system.")
 
 if (NOT (LIBOMPTARGET_DEP_CUDA_FOUND OR LIBOMPTARGET_BUILD_NVPTX_BCLIB))
-  libomptarget_say("Not building NVPTX deviceRTL by default on non-CUDA system.")
+  libomptarget_say("Not building NVPTX deviceRTL by default on CUDA free system.")
   return()
 endif()
 
@@ -73,16 +73,22 @@ set(devicertl_common_directory
 set(devicertl_nvptx_directory
   ${devicertl_base_directory}/nvptx)
 
-if (DEFINED LIBOMPTARGET_DEP_CUDA_ARCH)
-  set(default_capabilities ${LIBOMPTARGET_DEP_CUDA_ARCH})
-else()
-  set(default_capabilities 35 37 50 52 53 60 61 62 70 72 75 80)
-endif()
+set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80)
 
-set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${default_capabilities} CACHE STRING
+set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${all_capabilities} CACHE STRING
   "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
+string(TOLOWER ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES} LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
 
-set(nvptx_sm_list ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES})
+if (LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES STREQUAL "all")
+  set(nvptx_sm_list ${all_capabilities})
+elseif(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES STREQUAL "auto")
+  if (NOT LIBOMPTARGET_DEP_CUDA_FOUND)
+    libomptarget_error_say("[NVPTX] Cannot auto detect compute capability as CUDA not found.")
+  endif()
+  set(nvptx_sm_list ${LIBOMPTARGET_DEP_CUDA_ARCH})
+else()
+  string(REPLACE "," ";" nvptx_sm_list "${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES}")
+endif()
 
 # If user set LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES to empty, we disable the
 # build.
@@ -93,8 +99,8 @@ endif()
 
 # Check all SM values
 foreach(sm ${nvptx_sm_list})
-  if (NOT ${sm} IN_LIST default_capabilities)
-    message(FATAL_ERROR "LIBOMPTARGET-NVPTX: compute capability ${sm} is not supported. Supported values: ${default_capabilities}")
+  if (NOT ${sm} IN_LIST all_capabilities)
+    libomptarget_warning_say("[NVPTX] Compute capability ${sm} is not supported. Make sure clang can work with it.")
   endif()
 endforeach()
 

From a4a4036d7aef942b6ff3a8c594937c0e0f6512b0 Mon Sep 17 00:00:00 2001
From: Zarko Todorovski <zarko@ca.ibm.com>
Date: Fri, 29 Jan 2021 14:05:17 -0500
Subject: [PATCH 100/244] [AIX] Actually push back "-mabi=vec-extabi" when
 option is on.

Accidentaly ommitted the portion of pushing back the option in
https://reviews.llvm.org/D94986

(cherry picked from commit caaaebcde462bf681498ce85c2659d683a07fc87)
---
 clang/lib/Driver/ToolChains/Clang.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fdb8a58cd1b3..d75eb0c58d8b 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4684,6 +4684,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
           << A->getSpelling() << RawTriple.str();
     if (A->getOption().getID() == options::OPT_mabi_EQ_vec_default)
       D.Diag(diag::err_aix_default_altivec_abi);
+    if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi)
+      CmdArgs.push_back("-mabi=vec-extabi");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) {

From db9731e4500533e07ee3a0aaaf44c8f8c00ca2a8 Mon Sep 17 00:00:00 2001
From: Zarko Todorovski <zarko@ca.ibm.com>
Date: Tue, 2 Feb 2021 10:56:15 -0500
Subject: [PATCH 101/244] [AIX] Improve option processing for mabi=vec-extabi
 and mabi=vec=defaul

Opening this revision to better address comments by @hubert.reinterpretcast in https://reviews.llvm.org/rGcaaaebcde462

Reviewed By: hubert.reinterpretcast

Differential Revision: https://reviews.llvm.org/D95702

(cherry picked from commit eb3426a528d5b3cbbb54aee662a779f2067fc9db)
---
 clang/lib/Driver/ToolChains/Clang.cpp | 12 ++----------
 clang/test/CodeGen/altivec.c          |  3 ---
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d75eb0c58d8b..f8e637974662 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4669,23 +4669,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  if (Triple.isOSAIX() && Args.hasArg(options::OPT_maltivec)) {
-    if (Args.getLastArg(options::OPT_mabi_EQ_vec_extabi)) {
-      CmdArgs.push_back("-mabi=vec-extabi");
-    } else {
-      D.Diag(diag::err_aix_default_altivec_abi);
-    }
-  }
-
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_vec_extabi,
                                options::OPT_mabi_EQ_vec_default)) {
     if (!Triple.isOSAIX())
       D.Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getSpelling() << RawTriple.str();
-    if (A->getOption().getID() == options::OPT_mabi_EQ_vec_default)
-      D.Diag(diag::err_aix_default_altivec_abi);
     if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi)
       CmdArgs.push_back("-mabi=vec-extabi");
+    else
+      D.Diag(diag::err_aix_default_altivec_abi);
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) {
diff --git a/clang/test/CodeGen/altivec.c b/clang/test/CodeGen/altivec.c
index d69c34d82190..86b570f15d08 100644
--- a/clang/test/CodeGen/altivec.c
+++ b/clang/test/CodeGen/altivec.c
@@ -6,9 +6,6 @@
 // RUN: %clang_cc1 -target-feature +altivec -mabi=vec-extabi -target-cpu pwr8 -triple powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 // RUN: not %clang_cc1 -target-feature +altivec -mabi=vec-default -target-cpu pwr8 -triple powerpc-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR
 // RUN: not %clang_cc1 -target-feature +altivec -mabi=vec-default -target-cpu pwr8 -triple powerpc64-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR
-
-// RUN: not %clang -S -emit-llvm -maltivec -mcpu=pwr8 -target powerpc-unknown-aix %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR
-// RUN: not %clang -S -emit-llvm -maltivec -mcpu=pwr8 -target powerpc64-unknown-aix %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR 
 // RUN: %clang -S -emit-llvm -maltivec -mabi=vec-extabi -mcpu=pwr8 -target powerpc-unknown-aix %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 // RUN: %clang -S -emit-llvm -maltivec -mabi=vec-extabi -mcpu=pwr8 -target powerpc64-unknown-aix %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 // RUN: not %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 -triple powerpc-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR

From f5b2787d07c1c61ebbda3f11dd770b6c57e2b662 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 4 Feb 2021 09:17:47 -0800
Subject: [PATCH 102/244] [ELF] Allow R_386_GOTOFF from .debug_info

In GCC emitted .debug_info sections, R_386_GOTOFF may be used to
relocate DW_AT_GNU_call_site_value values
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98946).

R_386_GOTOFF (`S + A - GOT`) is one of the `isStaticLinkTimeConstant` relocation
type which is not PC-relative, so it can be used from non-SHF_ALLOC sections. We
current allow new relocation types as needs come. The diagnostic has caught some
bugs in the past.

Differential Revision: https://reviews.llvm.org/D95994

(cherry picked from commit b3165a70ae83b46dc145f335dfa9690ece361e92)
---
 lld/ELF/InputSection.cpp     |  5 ++++-
 lld/test/ELF/non-abs-reloc.s | 18 ++++++++++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index f40bb258b9af..6f16fc7abc48 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -901,7 +901,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
       continue;
     }
 
-    if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) {
+    // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC
+    // sections.
+    if (expr != R_ABS && expr != R_DTPREL && expr != R_GOTPLTREL &&
+        expr != R_RISCV_ADD) {
       std::string msg = getLocation<ELFT>(offset) +
                         ": has non-ABS relocation " + toString(type) +
                         " against symbol '" + toString(sym) + "'";
diff --git a/lld/test/ELF/non-abs-reloc.s b/lld/test/ELF/non-abs-reloc.s
index 72a65424ed1f..82f913efe4d8 100644
--- a/lld/test/ELF/non-abs-reloc.s
+++ b/lld/test/ELF/non-abs-reloc.s
@@ -1,17 +1,17 @@
 // REQUIRES: x86
 // RUN: split-file %s %t
-// RUN: llvm-mc -filetype=obj -triple=x86_64 %t/asm -o %t.o
-// RUN: ld.lld -T %t/lds %t.o -o %t.exe 2>&1 | FileCheck %s
-// CHECK:      warning: {{.*}}.o:(.nonalloc1+0x1): has non-ABS relocation R_X86_64_PC32 against symbol '_start'
-// CHECK-NEXT: warning: {{.*}}.o:(.nonalloc1+0x6): has non-ABS relocation R_X86_64_PC32 against symbol '_start'
+// RUN: llvm-mc -filetype=obj -triple=i386 %t/asm -o %t.o
+// RUN: ld.lld -T %t/lds %t.o -o %t.exe 2>&1 | FileCheck %s --implicit-check-not=warning: --implicit-check-not=error:
+// CHECK:      warning: {{.*}}.o:(.nonalloc1+0x1): has non-ABS relocation R_386_PC32 against symbol '_start'
+// CHECK-NEXT: warning: {{.*}}.o:(.nonalloc1+0x6): has non-ABS relocation R_386_PC32 against symbol '_start'
 
 // RUN: llvm-objdump -D --no-show-raw-insn %t.exe | FileCheck --check-prefix=DISASM %s
 // DISASM:      Disassembly of section .nonalloc:
 // DISASM-EMPTY:
 // DISASM-NEXT: <.nonalloc>:
 // DISASM-NEXT:   0: nop
-// DISASM-NEXT:   1: callq 0x0
-// DISASM-NEXT:   6: callq 0x0
+// DISASM-NEXT:   1: calll 0x0
+// DISASM-NEXT:   6: calll 0x0
 
 //--- lds
 SECTIONS {
@@ -20,6 +20,7 @@ SECTIONS {
 //--- asm
 .globl _start
 _start:
+.L0:
   nop
 
 .section .nonalloc0
@@ -30,3 +31,8 @@ _start:
   .long _start - . - 4
   .byte 0xe8
   .long _start - . - 4
+
+// GCC may relocate DW_AT_GNU_call_site_value with R_386_GOTOFF.
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98946
+.section .debug_info
+  .long .L0@gotoff

From a6ea391b832573830b011f26013ebaa946032250 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 8 Feb 2021 15:24:42 +0200
Subject: [PATCH 103/244] [AArch64] Use '//' as comment string for MSVC
 assembly

As the actual MSVC toolset doesn't use the GAS-style assembly that
Clang/LLVM produces and consumes, there's no reference for what
string to use for e.g. comments when building with a MSVC triple.

This frees up the use of semicolon as separator string, just like
was done for GNU targets in 23413195649d0cf6f3860ae8b5fb115b35032075.
(Previously, both the separator and comment strings were set to
the same, a semicolon.)

Compiler-rt extensively uses separator chars in its assembly,
and that assembly should be buildable with clang-cl for MSVC too.

Differential Revision: https://reviews.llvm.org/D96259

(cherry picked from commit 71c29b4cf3fb2b5610991bfbc12b8bda97d60005)
---
 .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp |   2 +-
 llvm/test/CodeGen/AArch64/cfguard-checks.ll   |   4 +-
 llvm/test/CodeGen/AArch64/landingpad-ifcvt.ll |   2 +-
 .../test/CodeGen/AArch64/reloc-specifiers.mir |   4 +-
 llvm/test/CodeGen/AArch64/seh_funclet_x1.ll   |   2 +-
 llvm/test/CodeGen/AArch64/win64-no-uwtable.ll |   4 +-
 .../CodeGen/AArch64/windows-extern-weak.ll    |   2 +-
 .../CodeGen/AArch64/wineh-try-catch-nobase.ll |   6 +-
 llvm/test/CodeGen/AArch64/wineh-try-catch.ll  |  18 +--
 llvm/test/MC/AArch64/coff-relocations.s       | 110 +++++++++---------
 .../AArch64/{coff-gnu.s => coff-separator.s}  |   2 +
 11 files changed, 79 insertions(+), 77 deletions(-)
 rename llvm/test/MC/AArch64/{coff-gnu.s => coff-separator.s} (74%)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 37c924d879b1..68c721cb0d72 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
   SupportsDebugInformation = true;
   CodePointerSize = 8;
 
-  CommentString = ";";
+  CommentString = "//";
   ExceptionsType = ExceptionHandling::WinEH;
   WinEHEncodingType = WinEH::EncodingType::Itanium;
 }
diff --git a/llvm/test/CodeGen/AArch64/cfguard-checks.ll b/llvm/test/CodeGen/AArch64/cfguard-checks.ll
index 66ec4b6ed074..6dc94e220712 100644
--- a/llvm/test/CodeGen/AArch64/cfguard-checks.ll
+++ b/llvm/test/CodeGen/AArch64/cfguard-checks.ll
@@ -96,8 +96,8 @@ lpad:                                             ; preds = %entry
 	; CHECK:        blr x9
   ; CHECK-NEXT:   .Ltmp0:
 	; CHECK-NEXT:   blr x8
-  ; CHECK:       ; %invoke.cont
-  ; CHECK:       ; %lpad
+  ; CHECK:       // %invoke.cont
+  ; CHECK:       // %lpad
 }
 
 declare void @h()
diff --git a/llvm/test/CodeGen/AArch64/landingpad-ifcvt.ll b/llvm/test/CodeGen/AArch64/landingpad-ifcvt.ll
index 4437970e1660..a5497b1d8e14 100644
--- a/llvm/test/CodeGen/AArch64/landingpad-ifcvt.ll
+++ b/llvm/test/CodeGen/AArch64/landingpad-ifcvt.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s | FileCheck %s
 
 ; Make sure this doesn't crash (and the output is sane).
-; CHECK: ; %__except.ret
+; CHECK: // %__except.ret
 ; CHECK-NEXT: mov     x0, xzr
 
 target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/reloc-specifiers.mir b/llvm/test/CodeGen/AArch64/reloc-specifiers.mir
index 374a4759b4cd..3c56874e441a 100644
--- a/llvm/test/CodeGen/AArch64/reloc-specifiers.mir
+++ b/llvm/test/CodeGen/AArch64/reloc-specifiers.mir
@@ -11,11 +11,11 @@ body: |
   bb.0:
     ; CHECK-LABEL: bar
 
-    ; CHECK: movz    x0, #:abs_g1_s:.Lfoo$frame_escape_0 ; encoding: [0bAAA00000,A,0b101AAAAA,0xd2]
+    ; CHECK: movz    x0, #:abs_g1_s:.Lfoo$frame_escape_0 // encoding: [0bAAA00000,A,0b101AAAAA,0xd2]
     ; CHECK: fixup A - offset: 0, value: :abs_g1_s:.Lfoo$frame_escape_0, kind: fixup_aarch64_movw
     renamable $x0 = MOVZXi target-flags(aarch64-g1, aarch64-s) <mcsymbol .Lfoo$frame_escape_0>, 16
 
-    ; CHECK: movk    x0, #:abs_g0_nc:.Lfoo$frame_escape_0 ; encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
+    ; CHECK: movk    x0, #:abs_g0_nc:.Lfoo$frame_escape_0 // encoding: [0bAAA00000,A,0b100AAAAA,0xf2]
     ; CHECK: fixup A - offset: 0, value: :abs_g0_nc:.Lfoo$frame_escape_0, kind: fixup_aarch64_movw
     renamable $x0 = MOVKXi $x0, target-flags(aarch64-g0, aarch64-nc) <mcsymbol .Lfoo$frame_escape_0>, 0
 ...
diff --git a/llvm/test/CodeGen/AArch64/seh_funclet_x1.ll b/llvm/test/CodeGen/AArch64/seh_funclet_x1.ll
index 1f524716be9a..7f5a0324f9c0 100644
--- a/llvm/test/CodeGen/AArch64/seh_funclet_x1.ll
+++ b/llvm/test/CodeGen/AArch64/seh_funclet_x1.ll
@@ -5,7 +5,7 @@
 
 ; CHECK:      ?dtor$3@?0?main@4HA":
 ; CHECK:      .seh_proc "?dtor$3@?0?main@4HA"
-; CHECK:      stp     x29, x30, [sp, #-16]!   ; 16-byte Folded Spill
+; CHECK:      stp     x29, x30, [sp, #-16]!   // 16-byte Folded Spill
 ; CHECK-NEXT: .seh_save_fplr_x 16
 ; CHECK-NEXT: .seh_endprologue
 ; CHECK-NEXT: mov     x29, x1
diff --git a/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll b/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
index f04a47cd1e44..789620a21dd6 100644
--- a/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
+++ b/llvm/test/CodeGen/AArch64/win64-no-uwtable.ll
@@ -13,11 +13,11 @@ define dso_local void @SEHfilter() nounwind "frame-pointer"="all" {
 ; CHECK-NEXT:  mov     x29, sp
 ; CHECK-NEXT:  bl      g
 ; CHECK-NEXT:  cbz     w19, .LBB0_2
-; CHECK-NEXT:  ; %bb.1:
+; CHECK-NEXT:  // %bb.1:
 ; CHECK-NEXT:  ldr     x19, [sp, #16]
 ; CHECK-NEXT:  ldp     x30, x29, [sp], #32
 ; CHECK-NEXT:  ret
-; CHECK-NEXT:  .LBB0_2:                                ; %if.end.i
+; CHECK-NEXT:  .LBB0_2:                                // %if.end.i
 ; CHECK-NEXT:  bl      f
 ; CHECK-NEXT:  brk     #0x1
   %1 = load i32, i32* undef, align 4
diff --git a/llvm/test/CodeGen/AArch64/windows-extern-weak.ll b/llvm/test/CodeGen/AArch64/windows-extern-weak.ll
index 18df2ddc5db4..dbd17e35f44a 100644
--- a/llvm/test/CodeGen/AArch64/windows-extern-weak.ll
+++ b/llvm/test/CodeGen/AArch64/windows-extern-weak.ll
@@ -10,7 +10,7 @@ define void @func() {
 ; CHECK-NEXT: adrp x8, .refptr.weakfunc
 ; CHECK-NEXT: ldr x8, [x8, :lo12:.refptr.weakfunc]
 ; CHECK-NEXT: cbz     x8, .LBB0_2
-; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: // %bb.1:
 ; CHECK-NEXT: blr     x8
 ; CHECK-NEXT: .LBB0_2:
 ; CHECK-NEXT: .seh_startepilogue
diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch-nobase.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch-nobase.ll
index bf1ebaa3d277..1552a554eb4e 100644
--- a/llvm/test/CodeGen/AArch64/wineh-try-catch-nobase.ll
+++ b/llvm/test/CodeGen/AArch64/wineh-try-catch-nobase.ll
@@ -6,16 +6,16 @@
 
 ; Check that we compute the address relative to fp.
 ; CHECK-LABEL: "?catch$2@?0??a@@YAXXZ@4HA":
-; CHECK:             stp     x29, x30, [sp, #-16]!   ; 16-byte Folded Spill
+; CHECK:             stp     x29, x30, [sp, #-16]!   // 16-byte Folded Spill
 ; CHECK-NEXT:        .seh_save_fplr_x 16
 ; CHECK-NEXT:        .seh_endprologue
-; CHECK-NEXT:        sub     x0, x29, #16            ; =16
+; CHECK-NEXT:        sub     x0, x29, #16            // =16
 ; CHECK-NEXT:        mov     x1, xzr
 ; CHECK-NEXT:        bl      "?bb@@YAXPEAHH@Z"
 ; CHECK-NEXT:        adrp    x0, .LBB0_1
 ; CHECK-NEXT:        add     x0, x0, .LBB0_1
 ; CHECK-NEXT:        .seh_startepilogue
-; CHECK-NEXT:        ldp     x29, x30, [sp], #16     ; 16-byte Folded Reload
+; CHECK-NEXT:        ldp     x29, x30, [sp], #16     // 16-byte Folded Reload
 ; CHECK-NEXT:        .seh_save_fplr_x 16
 ; CHECK-NEXT:        .seh_endepilogue
 ; CHECK-NEXT:        ret
diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll
index 8bf5aa33e24a..7de7d60b8cab 100644
--- a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll
+++ b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll
@@ -41,7 +41,7 @@
 ; CHECK-LABEL: .Ltmp0:
 ; CHECK:       bl      "?func2@@YAHXZ
 
-; CHECK:        [[CATCHRETDEST:.LBB0_[0-9]+]]:      ; %catchret.dest
+; CHECK:        [[CATCHRETDEST:.LBB0_[0-9]+]]:      // %catchret.dest
 
 ; Check the catch funclet.
 ; CHECK-LABEL: "?catch$2@?0??func@@YAHXZ@4HA":
@@ -74,14 +74,14 @@
 ; entry to func is encoded in cppxdata that is passed to __CxxFrameHandler3.  As
 ; computed above, this comes to -16.
 ; CHECK-LABEL:        "$cppxdata$?func@@YAHXZ":
-; CHECK-NEXT:         .word   429065506               ; MagicNumber
-; CHECK-NEXT:         .word   2                       ; MaxState
-; CHECK-NEXT:         .word   ("$stateUnwindMap$?func@@YAHXZ")@IMGREL ; UnwindMap
-; CHECK-NEXT:         .word   1                       ; NumTryBlocks
-; CHECK-NEXT:         .word   ("$tryMap$?func@@YAHXZ")@IMGREL ; TryBlockMap
-; CHECK-NEXT:         .word   4                       ; IPMapEntries
-; CHECK-NEXT:         .word   ("$ip2state$?func@@YAHXZ")@IMGREL ; IPToStateXData
-; CHECK-NEXT:         .word   -16                     ; UnwindHelp
+; CHECK-NEXT:         .word   429065506               // MagicNumber
+; CHECK-NEXT:         .word   2                       // MaxState
+; CHECK-NEXT:         .word   ("$stateUnwindMap$?func@@YAHXZ")@IMGREL // UnwindMap
+; CHECK-NEXT:         .word   1                       // NumTryBlocks
+; CHECK-NEXT:         .word   ("$tryMap$?func@@YAHXZ")@IMGREL // TryBlockMap
+; CHECK-NEXT:         .word   4                       // IPMapEntries
+; CHECK-NEXT:         .word   ("$ip2state$?func@@YAHXZ")@IMGREL // IPToStateXData
+; CHECK-NEXT:         .word   -16                     // UnwindHelp
 
 ; UNWIND: Function: ?func@@YAHXZ (0x0)
 ; UNWIND: Prologue [
diff --git a/llvm/test/MC/AArch64/coff-relocations.s b/llvm/test/MC/AArch64/coff-relocations.s
index 54706fd897e6..6433109d5634 100644
--- a/llvm/test/MC/AArch64/coff-relocations.s
+++ b/llvm/test/MC/AArch64/coff-relocations.s
@@ -1,41 +1,41 @@
-; RUN: llvm-mc -triple aarch64-windows -filetype obj -o %t.obj %s
-; RUN: llvm-readobj -r %t.obj | FileCheck %s
-; RUN: llvm-objdump -d %t.obj | FileCheck %s --check-prefix=DISASM
+// RUN: llvm-mc -triple aarch64-windows -filetype obj -o %t.obj %s
+// RUN: llvm-readobj -r %t.obj | FileCheck %s
+// RUN: llvm-objdump -d %t.obj | FileCheck %s --check-prefix=DISASM
 
-; IMAGE_REL_ARM64_ADDR32
+// IMAGE_REL_ARM64_ADDR32
 .Linfo_foo:
   .asciz "foo"
   .long foo
 
-; IMAGE_REL_ARM64_ADDR32NB
+// IMAGE_REL_ARM64_ADDR32NB
 .long func@IMGREL
 
-; IMAGE_REL_ARM64_ADDR64
+// IMAGE_REL_ARM64_ADDR64
 .globl struc
 struc:
   .quad arr
 
-; IMAGE_REL_ARM64_BRANCH26
+// IMAGE_REL_ARM64_BRANCH26
 b target
 
-; IMAGE_REL_ARM64_PAGEBASE_REL21
+// IMAGE_REL_ARM64_PAGEBASE_REL21
 adrp x0, foo
 
-; IMAGE_REL_ARM64_PAGEOFFSET_12A
+// IMAGE_REL_ARM64_PAGEOFFSET_12A
 add x0, x0, :lo12:foo
 
-; IMAGE_REL_ARM64_PAGEOFFSET_12L
+// IMAGE_REL_ARM64_PAGEOFFSET_12L
 ldr x0, [x0, :lo12:foo]
 
-; IMAGE_REL_ARM64_PAGEBASE_REL21, even if the symbol offset is known
+// IMAGE_REL_ARM64_PAGEBASE_REL21, even if the symbol offset is known
 adrp x0, bar
 bar:
 
-; IMAGE_REL_ARM64_SECREL
+// IMAGE_REL_ARM64_SECREL
 .secrel32 .Linfo_bar
 .Linfo_bar:
 
-; IMAGE_REL_ARM64_SECTION
+// IMAGE_REL_ARM64_SECTION
 .secidx func
 
 .align 2
@@ -45,55 +45,55 @@ add x0, x0, :lo12:foo + 0x12345
 ldrb w0, [x0, :lo12:foo + 0x12345]
 ldr x0, [x0, :lo12:foo + 0x12348]
 
-; IMAGE_REL_ARM64_SECREL_LOW12A
+// IMAGE_REL_ARM64_SECREL_LOW12A
 add x0, x0, :secrel_lo12:foo
-; IMAGE_REL_ARM64_SECREL_HIGH12A
+// IMAGE_REL_ARM64_SECREL_HIGH12A
 add x0, x0, :secrel_hi12:foo
-; IMAGE_REL_ARM64_SECREL_LOW12L
+// IMAGE_REL_ARM64_SECREL_LOW12L
 ldr x0, [x0, :secrel_lo12:foo]
 
-; IMAGE_REL_ARM64_REL21
+// IMAGE_REL_ARM64_REL21
 adr x0, foo + 0x12345
 
-; IMAGE_REL_ARM64_BRANCH19
+// IMAGE_REL_ARM64_BRANCH19
 bne target
 
-; IMAGE_REL_ARM64_BRANCH14
+// IMAGE_REL_ARM64_BRANCH14
 tbz x0, #0, target
 
-; CHECK: Format: COFF-ARM64
-; CHECK: Arch: aarch64
-; CHECK: AddressSize: 64bit
-; CHECK: Relocations [
-; CHECK:   Section (1) .text {
-; CHECK: 0x4 IMAGE_REL_ARM64_ADDR32 foo
-; CHECK: 0x8 IMAGE_REL_ARM64_ADDR32NB func
-; CHECK: 0xC IMAGE_REL_ARM64_ADDR64 arr
-; CHECK: 0x14 IMAGE_REL_ARM64_BRANCH26 target
-; CHECK: 0x18 IMAGE_REL_ARM64_PAGEBASE_REL21 foo
-; CHECK: 0x1C IMAGE_REL_ARM64_PAGEOFFSET_12A foo
-; CHECK: 0x20 IMAGE_REL_ARM64_PAGEOFFSET_12L foo
-; CHECK: 0x24 IMAGE_REL_ARM64_PAGEBASE_REL21 bar
-; CHECK: 0x28 IMAGE_REL_ARM64_SECREL .text
-; CHECK: 0x2C IMAGE_REL_ARM64_SECTION func
-; CHECK: 0x30 IMAGE_REL_ARM64_PAGEBASE_REL21 baz
-; CHECK: 0x34 IMAGE_REL_ARM64_PAGEOFFSET_12A foo
-; CHECK: 0x38 IMAGE_REL_ARM64_PAGEOFFSET_12L foo
-; CHECK: 0x3C IMAGE_REL_ARM64_PAGEOFFSET_12L foo
-; CHECK: 0x40 IMAGE_REL_ARM64_SECREL_LOW12A foo
-; CHECK: 0x44 IMAGE_REL_ARM64_SECREL_HIGH12A foo
-; CHECK: 0x48 IMAGE_REL_ARM64_SECREL_LOW12L foo
-; CHECK: 0x4C IMAGE_REL_ARM64_REL21 foo
-; CHECK: 0x50 IMAGE_REL_ARM64_BRANCH19 target
-; CHECK: 0x54 IMAGE_REL_ARM64_BRANCH14 target
-; CHECK:   }
-; CHECK: ]
-
-; DISASM: 30:       20 1a 09 b0     adrp    x0, 0x12345000
-; DISASM: 34:       00 14 0d 91     add     x0, x0, #837
-; DISASM: 38:       00 14 4d 39     ldrb    w0, [x0, #837]
-; DISASM: 3c:       00 a4 41 f9     ldr     x0, [x0, #840]
-; DISASM: 40:       00 00 00 91     add     x0, x0, #0
-; DISASM: 44:       00 00 40 91     add     x0, x0, #0, lsl #12
-; DISASM: 48:       00 00 40 f9     ldr     x0, [x0]
-; DISASM: 4c:       20 1a 09 30     adr     x0, #74565
+// CHECK: Format: COFF-ARM64
+// CHECK: Arch: aarch64
+// CHECK: AddressSize: 64bit
+// CHECK: Relocations [
+// CHECK:   Section (1) .text {
+// CHECK: 0x4 IMAGE_REL_ARM64_ADDR32 foo
+// CHECK: 0x8 IMAGE_REL_ARM64_ADDR32NB func
+// CHECK: 0xC IMAGE_REL_ARM64_ADDR64 arr
+// CHECK: 0x14 IMAGE_REL_ARM64_BRANCH26 target
+// CHECK: 0x18 IMAGE_REL_ARM64_PAGEBASE_REL21 foo
+// CHECK: 0x1C IMAGE_REL_ARM64_PAGEOFFSET_12A foo
+// CHECK: 0x20 IMAGE_REL_ARM64_PAGEOFFSET_12L foo
+// CHECK: 0x24 IMAGE_REL_ARM64_PAGEBASE_REL21 bar
+// CHECK: 0x28 IMAGE_REL_ARM64_SECREL .text
+// CHECK: 0x2C IMAGE_REL_ARM64_SECTION func
+// CHECK: 0x30 IMAGE_REL_ARM64_PAGEBASE_REL21 baz
+// CHECK: 0x34 IMAGE_REL_ARM64_PAGEOFFSET_12A foo
+// CHECK: 0x38 IMAGE_REL_ARM64_PAGEOFFSET_12L foo
+// CHECK: 0x3C IMAGE_REL_ARM64_PAGEOFFSET_12L foo
+// CHECK: 0x40 IMAGE_REL_ARM64_SECREL_LOW12A foo
+// CHECK: 0x44 IMAGE_REL_ARM64_SECREL_HIGH12A foo
+// CHECK: 0x48 IMAGE_REL_ARM64_SECREL_LOW12L foo
+// CHECK: 0x4C IMAGE_REL_ARM64_REL21 foo
+// CHECK: 0x50 IMAGE_REL_ARM64_BRANCH19 target
+// CHECK: 0x54 IMAGE_REL_ARM64_BRANCH14 target
+// CHECK:   }
+// CHECK: ]
+
+// DISASM: 30:       20 1a 09 b0     adrp    x0, 0x12345000
+// DISASM: 34:       00 14 0d 91     add     x0, x0, #837
+// DISASM: 38:       00 14 4d 39     ldrb    w0, [x0, #837]
+// DISASM: 3c:       00 a4 41 f9     ldr     x0, [x0, #840]
+// DISASM: 40:       00 00 00 91     add     x0, x0, #0
+// DISASM: 44:       00 00 40 91     add     x0, x0, #0, lsl #12
+// DISASM: 48:       00 00 40 f9     ldr     x0, [x0]
+// DISASM: 4c:       20 1a 09 30     adr     x0, #74565
diff --git a/llvm/test/MC/AArch64/coff-gnu.s b/llvm/test/MC/AArch64/coff-separator.s
similarity index 74%
rename from llvm/test/MC/AArch64/coff-gnu.s
rename to llvm/test/MC/AArch64/coff-separator.s
index df0dc7d33cfb..7535cf0571b9 100644
--- a/llvm/test/MC/AArch64/coff-gnu.s
+++ b/llvm/test/MC/AArch64/coff-separator.s
@@ -1,5 +1,7 @@
 // RUN: llvm-mc -triple aarch64-windows-gnu -filetype obj -o %t.obj %s
 // RUN: llvm-objdump -d %t.obj | FileCheck %s
+// RUN: llvm-mc -triple aarch64-windows-msvc -filetype obj -o %t.obj %s
+// RUN: llvm-objdump -d %t.obj | FileCheck %s
 
 func:
 // Check that the nop instruction after the semicolon also is handled

From 5ae2b9726f27b571de71542ef4d59ebceee8aca7 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Thu, 4 Feb 2021 11:05:35 -0500
Subject: [PATCH 104/244] Recommit of a2fdf9d4d734732a6fa9288f1ffdf12bf8618123.

- The failures are all cc1-based tests due to the missing `-aux-triple` options,
which is always prepared by the driver in CUDA/HIP compilation.
- Add extra check on the missing aux-targetinfo to prevent crashing.

[hip][cuda] Enable extended lambda support on Windows.

- On Windows, extended lambda has extra issues due to the numbering
schemes are different between the host compilation (Microsoft C++ ABI)
and the device compilation (Itanium C++ ABI. Additional device side
lambda number is required per lambda for the host compilation to
correctly mangle the device-side lambda name.
- A hybrid numbering context `MSHIPNumberingContext` is introduced to
number a lambda for both host- and device-compilations.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D69322

This reverts commit 4874ff02417916cc9ff994b34abcb5e563056546.

(cherry picked from commit 01bf529db2cf465b029e29e537807576bfcbc452)
---
 clang/include/clang/AST/ASTContext.h          |  3 ++
 clang/include/clang/AST/DeclCXX.h             |  6 ++++
 clang/include/clang/AST/Mangle.h              |  3 ++
 .../clang/AST/MangleNumberingContext.h        |  5 +++
 clang/include/clang/Sema/Sema.h               |  2 +-
 clang/lib/AST/ASTImporter.cpp                 |  2 ++
 clang/lib/AST/CXXABI.h                        |  5 ++-
 clang/lib/AST/DeclCXX.cpp                     | 14 ++++++++
 clang/lib/AST/ItaniumCXXABI.cpp               |  6 ++++
 clang/lib/AST/ItaniumMangle.cpp               | 16 ++++++++-
 clang/lib/AST/MicrosoftCXXABI.cpp             | 33 +++++++++++++++++--
 clang/lib/CodeGen/CGCUDANV.cpp                |  8 +++++
 clang/lib/Sema/SemaLambda.cpp                 | 10 +++---
 clang/lib/Sema/TreeTransform.h                |  7 ++--
 clang/lib/Serialization/ASTReaderDecl.cpp     |  1 +
 clang/lib/Serialization/ASTWriter.cpp         |  1 +
 clang/test/CodeGenCUDA/unnamed-types.cu       | 27 +++++++++++++--
 17 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ce47d54e44b0..ae69a68608b7 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -538,6 +538,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// need them (like static local vars).
   llvm::MapVector<const NamedDecl *, unsigned> MangleNumbers;
   llvm::MapVector<const VarDecl *, unsigned> StaticLocalNumbers;
+  /// Mapping the associated device lambda mangling number if present.
+  mutable llvm::DenseMap<const CXXRecordDecl *, unsigned>
+      DeviceLambdaManglingNumbers;
 
   /// Mapping that stores parameterIndex values for ParmVarDecls when
   /// that value exceeds the bitfield size of ParmVarDeclBits.ParameterIndex.
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index e32101bb2276..89006b1cfa7f 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -1735,6 +1735,12 @@ class CXXRecordDecl : public RecordDecl {
     getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage;
   }
 
+  /// Set the device side mangling number.
+  void setDeviceLambdaManglingNumber(unsigned Num) const;
+
+  /// Retrieve the device side mangling number.
+  unsigned getDeviceLambdaManglingNumber() const;
+
   /// Returns the inheritance model used for this record.
   MSInheritanceModel getMSInheritanceModel() const;
 
diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h
index 0e8d6dd53d8a..7b6495d85eb6 100644
--- a/clang/include/clang/AST/Mangle.h
+++ b/clang/include/clang/AST/Mangle.h
@@ -96,6 +96,9 @@ class MangleContext {
   virtual bool shouldMangleCXXName(const NamedDecl *D) = 0;
   virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0;
 
+  virtual bool isDeviceMangleContext() const { return false; }
+  virtual void setDeviceMangleContext(bool) {}
+
   // FIXME: consider replacing raw_ostream & with something like SmallString &.
   void mangleName(GlobalDecl GD, raw_ostream &);
   virtual void mangleCXXName(GlobalDecl GD, raw_ostream &) = 0;
diff --git a/clang/include/clang/AST/MangleNumberingContext.h b/clang/include/clang/AST/MangleNumberingContext.h
index f1ca6a05dbaf..eb33759682d6 100644
--- a/clang/include/clang/AST/MangleNumberingContext.h
+++ b/clang/include/clang/AST/MangleNumberingContext.h
@@ -52,6 +52,11 @@ class MangleNumberingContext {
   /// this context.
   virtual unsigned getManglingNumber(const TagDecl *TD,
                                      unsigned MSLocalManglingNumber) = 0;
+
+  /// Retrieve the mangling number of a new lambda expression with the
+  /// given call operator within the device context. No device number is
+  /// assigned if there's no device numbering context is associated.
+  virtual unsigned getDeviceManglingNumber(const CXXMethodDecl *) { return 0; }
 };
 
 } // end namespace clang
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 42814f6ba8f6..2530a2776373 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6558,7 +6558,7 @@ class Sema final {
   /// Number lambda for linkage purposes if necessary.
   void handleLambdaNumbering(
       CXXRecordDecl *Class, CXXMethodDecl *Method,
-      Optional<std::tuple<unsigned, bool, Decl *>> Mangling = None);
+      Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling = None);
 
   /// Endow the lambda scope info with the relevant properties.
   void buildLambdaScope(sema::LambdaScopeInfo *LSI,
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 085c50c0667b..0d723fbbcd8c 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -2848,6 +2848,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
         return CDeclOrErr.takeError();
       D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr,
                                DCXX->hasKnownLambdaInternalLinkage());
+      D2CXX->setDeviceLambdaManglingNumber(
+          DCXX->getDeviceLambdaManglingNumber());
    } else if (DCXX->isInjectedClassName()) {
       // We have to be careful to do a similar dance to the one in
       // Sema::ActOnStartCXXMemberDeclarations
diff --git a/clang/lib/AST/CXXABI.h b/clang/lib/AST/CXXABI.h
index 31cb36918726..ca9424bcb7a4 100644
--- a/clang/lib/AST/CXXABI.h
+++ b/clang/lib/AST/CXXABI.h
@@ -22,8 +22,9 @@ class ASTContext;
 class CXXConstructorDecl;
 class DeclaratorDecl;
 class Expr;
-class MemberPointerType;
+class MangleContext;
 class MangleNumberingContext;
+class MemberPointerType;
 
 /// Implements C++ ABI-specific semantic analysis functions.
 class CXXABI {
@@ -75,6 +76,8 @@ class CXXABI {
 /// Creates an instance of a C++ ABI class.
 CXXABI *CreateItaniumCXXABI(ASTContext &Ctx);
 CXXABI *CreateMicrosoftCXXABI(ASTContext &Ctx);
+std::unique_ptr<MangleNumberingContext>
+createItaniumNumberingContext(MangleContext *);
 }
 
 #endif
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 0368ada0b81c..0375f9b4432e 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -1593,6 +1593,20 @@ Decl *CXXRecordDecl::getLambdaContextDecl() const {
   return getLambdaData().ContextDecl.get(Source);
 }
 
+void CXXRecordDecl::setDeviceLambdaManglingNumber(unsigned Num) const {
+  assert(isLambda() && "Not a lambda closure type!");
+  if (Num)
+    getASTContext().DeviceLambdaManglingNumbers[this] = Num;
+}
+
+unsigned CXXRecordDecl::getDeviceLambdaManglingNumber() const {
+  assert(isLambda() && "Not a lambda closure type!");
+  auto I = getASTContext().DeviceLambdaManglingNumbers.find(this);
+  if (I != getASTContext().DeviceLambdaManglingNumbers.end())
+    return I->second;
+  return 0;
+}
+
 static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) {
   QualType T =
       cast<CXXConversionDecl>(Conv->getUnderlyingDecl()->getAsFunction())
diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp
index 069add8464ae..be10258a2d77 100644
--- a/clang/lib/AST/ItaniumCXXABI.cpp
+++ b/clang/lib/AST/ItaniumCXXABI.cpp
@@ -258,3 +258,9 @@ class ItaniumCXXABI : public CXXABI {
 CXXABI *clang::CreateItaniumCXXABI(ASTContext &Ctx) {
   return new ItaniumCXXABI(Ctx);
 }
+
+std::unique_ptr<MangleNumberingContext>
+clang::createItaniumNumberingContext(MangleContext *Mangler) {
+  return std::make_unique<ItaniumNumberingContext>(
+      cast<ItaniumMangleContext>(Mangler));
+}
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 4420f6a2c1c3..5cad84a96845 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -125,6 +125,8 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext {
   llvm::DenseMap<DiscriminatorKeyTy, unsigned> Discriminator;
   llvm::DenseMap<const NamedDecl*, unsigned> Uniquifier;
 
+  bool IsDevCtx = false;
+
 public:
   explicit ItaniumMangleContextImpl(ASTContext &Context,
                                     DiagnosticsEngine &Diags)
@@ -137,6 +139,10 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext {
   bool shouldMangleStringLiteral(const StringLiteral *) override {
     return false;
   }
+
+  bool isDeviceMangleContext() const override { return IsDevCtx; }
+  void setDeviceMangleContext(bool IsDev) override { IsDevCtx = IsDev; }
+
   void mangleCXXName(GlobalDecl GD, raw_ostream &) override;
   void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk,
                    raw_ostream &) override;
@@ -1846,7 +1852,15 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) {
   // (in lexical order) with that same <lambda-sig> and context.
   //
   // The AST keeps track of the number for us.
-  unsigned Number = Lambda->getLambdaManglingNumber();
+  //
+  // In CUDA/HIP, to ensure the consistent lamba numbering between the device-
+  // and host-side compilations, an extra device mangle context may be created
+  // if the host-side CXX ABI has different numbering for lambda. In such case,
+  // if the mangle context is that device-side one, use the device-side lambda
+  // mangling number for this lambda.
+  unsigned Number = Context.isDeviceMangleContext()
+                        ? Lambda->getDeviceLambdaManglingNumber()
+                        : Lambda->getLambdaManglingNumber();
   assert(Number > 0 && "Lambda should be mangled as an unnamed class");
   if (Number > 1)
     mangleNumber(Number - 2);
diff --git a/clang/lib/AST/MicrosoftCXXABI.cpp b/clang/lib/AST/MicrosoftCXXABI.cpp
index f9f9fe985b6f..166aa3b3bd60 100644
--- a/clang/lib/AST/MicrosoftCXXABI.cpp
+++ b/clang/lib/AST/MicrosoftCXXABI.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/Mangle.h"
 #include "clang/AST/MangleNumberingContext.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/Type.h"
@@ -64,6 +65,19 @@ class MicrosoftNumberingContext : public MangleNumberingContext {
   }
 };
 
+class MSHIPNumberingContext : public MicrosoftNumberingContext {
+  std::unique_ptr<MangleNumberingContext> DeviceCtx;
+
+public:
+  MSHIPNumberingContext(MangleContext *DeviceMangler) {
+    DeviceCtx = createItaniumNumberingContext(DeviceMangler);
+  }
+
+  unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
+    return DeviceCtx->getManglingNumber(CallOperator);
+  }
+};
+
 class MicrosoftCXXABI : public CXXABI {
   ASTContext &Context;
   llvm::SmallDenseMap<CXXRecordDecl *, CXXConstructorDecl *> RecordToCopyCtor;
@@ -73,8 +87,20 @@ class MicrosoftCXXABI : public CXXABI {
   llvm::SmallDenseMap<TagDecl *, TypedefNameDecl *>
       UnnamedTagDeclToTypedefNameDecl;
 
+  // MangleContext for device numbering context, which is based on Itanium C++
+  // ABI.
+  std::unique_ptr<MangleContext> DeviceMangler;
+
 public:
-  MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { }
+  MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) {
+    if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) {
+      assert(Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+             Context.getAuxTargetInfo()->getCXXABI().isItaniumFamily() &&
+             "Unexpected combination of C++ ABIs.");
+      DeviceMangler.reset(
+          Context.createMangleContext(Context.getAuxTargetInfo()));
+    }
+  }
 
   MemberPointerInfo
   getMemberPointerInfo(const MemberPointerType *MPT) const override;
@@ -133,6 +159,10 @@ class MicrosoftCXXABI : public CXXABI {
 
   std::unique_ptr<MangleNumberingContext>
   createMangleNumberingContext() const override {
+    if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) {
+      assert(DeviceMangler && "Missing device mangler");
+      return std::make_unique<MSHIPNumberingContext>(DeviceMangler.get());
+    }
     return std::make_unique<MicrosoftNumberingContext>();
   }
 };
@@ -266,4 +296,3 @@ CXXABI::MemberPointerInfo MicrosoftCXXABI::getMemberPointerInfo(
 CXXABI *clang::CreateMicrosoftCXXABI(ASTContext &Ctx) {
   return new MicrosoftCXXABI(Ctx);
 }
-
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 33a2d6f4483e..e03631a7243a 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -184,6 +184,14 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
   CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
   VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
   VoidPtrPtrTy = VoidPtrTy->getPointerTo();
+  if (CGM.getContext().getAuxTargetInfo()) {
+    // If the host and device have different C++ ABIs, mark it as the device
+    // mangle context so that the mangling needs to retrieve the additonal
+    // device lambda mangling number instead of the regular host one.
+    DeviceMC->setDeviceMangleContext(
+        CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
+        CGM.getContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily());
+  }
 }
 
 llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index af61c82c2002..c1c6a4bf5c68 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -432,15 +432,16 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
 
 void Sema::handleLambdaNumbering(
     CXXRecordDecl *Class, CXXMethodDecl *Method,
-    Optional<std::tuple<unsigned, bool, Decl *>> Mangling) {
+    Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling) {
   if (Mangling) {
-    unsigned ManglingNumber;
     bool HasKnownInternalLinkage;
+    unsigned ManglingNumber, DeviceManglingNumber;
     Decl *ManglingContextDecl;
-    std::tie(ManglingNumber, HasKnownInternalLinkage, ManglingContextDecl) =
-        Mangling.getValue();
+    std::tie(HasKnownInternalLinkage, ManglingNumber, DeviceManglingNumber,
+             ManglingContextDecl) = Mangling.getValue();
     Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
                              HasKnownInternalLinkage);
+    Class->setDeviceLambdaManglingNumber(DeviceManglingNumber);
     return;
   }
 
@@ -476,6 +477,7 @@ void Sema::handleLambdaNumbering(
     unsigned ManglingNumber = MCtx->getManglingNumber(Method);
     Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
                              HasKnownInternalLinkage);
+    Class->setDeviceLambdaManglingNumber(MCtx->getDeviceManglingNumber(Method));
   }
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 0a596e50658b..3c68f9458e58 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12504,10 +12504,11 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
                                         E->getCaptureDefault());
   getDerived().transformedLocalDecl(OldClass, {Class});
 
-  Optional<std::tuple<unsigned, bool, Decl *>> Mangling;
+  Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling;
   if (getDerived().ReplacingOriginal())
-    Mangling = std::make_tuple(OldClass->getLambdaManglingNumber(),
-                               OldClass->hasKnownLambdaInternalLinkage(),
+    Mangling = std::make_tuple(OldClass->hasKnownLambdaInternalLinkage(),
+                               OldClass->getLambdaManglingNumber(),
+                               OldClass->getDeviceLambdaManglingNumber(),
                                OldClass->getLambdaContextDecl());
 
   // Build the call operator.
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 6bfb9bd783b5..18ab4666a7d8 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1748,6 +1748,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
     Lambda.NumExplicitCaptures = Record.readInt();
     Lambda.HasKnownInternalLinkage = Record.readInt();
     Lambda.ManglingNumber = Record.readInt();
+    D->setDeviceLambdaManglingNumber(Record.readInt());
     Lambda.ContextDecl = readDeclID();
     Lambda.Captures = (Capture *)Reader.getContext().Allocate(
         sizeof(Capture) * Lambda.NumCaptures);
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 6bfa7b0e7d6d..40900af6f9e0 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -5667,6 +5667,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
     Record->push_back(Lambda.NumExplicitCaptures);
     Record->push_back(Lambda.HasKnownInternalLinkage);
     Record->push_back(Lambda.ManglingNumber);
+    Record->push_back(D->getDeviceLambdaManglingNumber());
     AddDeclRef(D->getLambdaContextDecl());
     AddTypeSourceInfo(Lambda.MethodTyInfo);
     for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
diff --git a/clang/test/CodeGenCUDA/unnamed-types.cu b/clang/test/CodeGenCUDA/unnamed-types.cu
index 59bfa6d7a18f..f598117d969d 100644
--- a/clang/test/CodeGenCUDA/unnamed-types.cu
+++ b/clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@ __device__ float d1(float x) {
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template <typename F>
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template <typename F0, typename F1, typename F2>
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
     *p = 1.f;
@@ -29,11 +44,17 @@ void f0(float *p) {
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-    k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+    k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+              [] __device__ (float x) { return x + 4.f; },
+              [] __device__ (float x, float y) { return x * y; },
+              [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V<lambda_1>@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV<lambda_1>@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V<lambda_2>@?0??f1@@YAXPEAM@Z@V<lambda_3>@?0??2@YAX0@Z@V<lambda_4>@?0??2@YAX0@Z@@@YAXPEAMV<lambda_2>@?0??f1@@YAX0@Z@V<lambda_3>@?0??1@YAX0@Z@V<lambda_4>@?0??1@YAX0@Z@@Z{{.*}}@1

From 3979099a9b71dbf5e4d67fb3a5ae50c7afe707fa Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Tue, 9 Feb 2021 09:28:06 -0800
Subject: [PATCH 105/244] [RISCV] Remove SRO* and SLO* instructions from
 bitmanip.

As of the current draft these are no longer being considered
for the bitmanip spec. It wasn't clear what sub extension they
belonged in in the 0.93 spec.

So remove them. They can always be added back if something changes.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D96157

(cherry picked from commit fd5adae02cafe388673d3b3f92ef791af3c73cfe)
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp |  87 ----
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h   |   3 -
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td    |  67 ---
 llvm/test/CodeGen/RISCV/rv32Zbp.ll          | 504 --------------------
 llvm/test/CodeGen/RISCV/rv64Zbp.ll          | 306 ------------
 llvm/test/MC/RISCV/rv32zbp-invalid.s        |  18 -
 llvm/test/MC/RISCV/rv32zbp-valid.s          |  12 -
 llvm/test/MC/RISCV/rv64zbp-invalid.s        |  14 -
 llvm/test/MC/RISCV/rv64zbp-valid.s          |  12 -
 9 files changed, 1023 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 2121cc38f661..43bf16c53a62 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -826,93 +826,6 @@ bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const {
   return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff;
 }
 
-// Check that it is a SLOI (Shift Left Ones Immediate). A PatFrag has already
-// determined it has the right structure:
-//
-//  (OR (SHL RS1, VC2), VC1)
-//
-// Check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-//  VC1 == maskTrailingOnes(VC2)
-//
-bool RISCVDAGToDAGISel::MatchSLOI(SDNode *N) const {
-  assert(N->getOpcode() == ISD::OR);
-  assert(N->getOperand(0).getOpcode() == ISD::SHL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  SDValue Shl = N->getOperand(0);
-  if (Subtarget->is64Bit()) {
-    uint64_t VC1 = N->getConstantOperandVal(1);
-    uint64_t VC2 = Shl.getConstantOperandVal(1);
-    return VC1 == maskTrailingOnes<uint64_t>(VC2);
-  }
-
-  uint32_t VC1 = N->getConstantOperandVal(1);
-  uint32_t VC2 = Shl.getConstantOperandVal(1);
-  return VC1 == maskTrailingOnes<uint32_t>(VC2);
-}
-
-// Check that it is a SROI (Shift Right Ones Immediate). A PatFrag has already
-// determined it has the right structure:
-//
-//  (OR (SRL RS1, VC2), VC1)
-//
-// Check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-//  VC1 == maskLeadingOnes(VC2)
-//
-bool RISCVDAGToDAGISel::MatchSROI(SDNode *N) const {
-  assert(N->getOpcode() == ISD::OR);
-  assert(N->getOperand(0).getOpcode() == ISD::SRL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  SDValue Srl = N->getOperand(0);
-  if (Subtarget->is64Bit()) {
-    uint64_t VC1 = N->getConstantOperandVal(1);
-    uint64_t VC2 = Srl.getConstantOperandVal(1);
-    return VC1 == maskLeadingOnes<uint64_t>(VC2);
-  }
-
-  uint32_t VC1 = N->getConstantOperandVal(1);
-  uint32_t VC2 = Srl.getConstantOperandVal(1);
-  return VC1 == maskLeadingOnes<uint32_t>(VC2);
-}
-
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). A PatFrag
-// has already determined it has the right structure:
-//
-//  (OR (SRL RS1, VC2), VC1)
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-//  VC2 < 32
-//  VC1 == maskTrailingZeros<uint64_t>(32 - VC2)
-//
-bool RISCVDAGToDAGISel::MatchSROIW(SDNode *N) const {
-  assert(N->getOpcode() == ISD::OR);
-  assert(N->getOperand(0).getOpcode() == ISD::SRL);
-  assert(isa<ConstantSDNode>(N->getOperand(1)));
-  assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
-  // The IsRV64 predicate is checked after PatFrag predicates so we can get
-  // here even on RV32.
-  if (!Subtarget->is64Bit())
-    return false;
-
-  SDValue Srl = N->getOperand(0);
-  uint64_t VC1 = N->getConstantOperandVal(1);
-  uint64_t VC2 = Srl.getConstantOperandVal(1);
-
-  // Immediate range should be enforced by uimm5 predicate.
-  assert(VC2 < 32 && "Unexpected immediate");
-  return VC1 == maskTrailingZeros<uint64_t>(32 - VC2);
-}
-
 // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
 // on RV64).
 // SLLIUW is the same as SLLI except for the fact that it clears the bits
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 0c58c5379e13..6099586d049d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -46,9 +46,6 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   bool SelectAddrFI(SDValue Addr, SDValue &Base);
 
   bool MatchSRLIW(SDNode *N) const;
-  bool MatchSLOI(SDNode *N) const;
-  bool MatchSROI(SDNode *N) const;
-  bool MatchSROIW(SDNode *N) const;
   bool MatchSLLIUW(SDNode *N) const;
 
   bool selectVLOp(SDValue N, SDValue &VL);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 1bc288b5177c..7888ac7bac8e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -45,25 +45,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
   }];
 }
 
-
-// Check that it is a SLOI (Shift Left Ones Immediate).
-def SLOIPat : PatFrag<(ops node:$A, node:$B),
-                      (or (shl node:$A, node:$B), imm), [{
-  return MatchSLOI(N);
-}]>;
-
-// Check that it is a SROI (Shift Right Ones Immediate).
-def SROIPat : PatFrag<(ops node:$A, node:$B),
-                      (or (srl node:$A, node:$B), imm), [{
-  return MatchSROI(N);
-}]>;
-
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
-def SROIWPat : PatFrag<(ops node:$A, node:$B),
-                       (or (srl node:$A, node:$B), imm), [{
-  return MatchSROIW(N);
-}]>;
-
 // Checks if this mask has a single 0 bit and cannot be used with ANDI.
 def BCLRMask : ImmLeaf<XLenVT, [{
   if (Subtarget->is64Bit())
@@ -210,11 +191,6 @@ def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">, Sched<[]>;
 def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[]>;
 } // Predicates = [HasStdExtZba]
 
-let Predicates = [HasStdExtZbp] in {
-def SLO  : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>;
-def SRO  : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
 let Predicates = [HasStdExtZbbOrZbp] in {
 def ROL   : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>;
 def ROR   : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>;
@@ -238,11 +214,6 @@ def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
 def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
-let Predicates = [HasStdExtZbp] in {
-def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>;
-def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
 let Predicates = [HasStdExtZbbOrZbp] in
 def RORI  : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>;
 
@@ -369,11 +340,6 @@ def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[]>;
 def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[]>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SLOW   : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>;
-def SROW   : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
 def ROLW  : ALUW_rr<0b0110000, 0b001, "rolw">, Sched<[]>;
 def RORW  : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>;
@@ -395,11 +361,6 @@ let Predicates = [HasStdExtZbp, IsRV64] in {
 def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SLOIW  : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>;
-def SROIW  : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
 def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>;
 
@@ -673,13 +634,6 @@ def : Pat<(or  GPR:$rs1, (not GPR:$rs2)), (ORN  GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbbOrZbp]
 
-let Predicates = [HasStdExtZbp] in {
-def : Pat<(not (shiftop<shl> (not GPR:$rs1), GPR:$rs2)),
-          (SLO GPR:$rs1, GPR:$rs2)>;
-def : Pat<(not (shiftop<srl> (not GPR:$rs1), GPR:$rs2)),
-          (SRO GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp]
-
 let Predicates = [HasStdExtZbbOrZbp] in {
 def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
 def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
@@ -710,13 +664,6 @@ def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
           (BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
 }
 
-let Predicates = [HasStdExtZbp] in {
-def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
-          (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
-          (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
-} // Predicates = [HasStdExtZbp]
-
 // There's no encoding for roli in the the 'B' extension as it can be
 // implemented with rori by negating the immediate.
 let Predicates = [HasStdExtZbbOrZbp] in {
@@ -936,13 +883,6 @@ def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 3)), GPR:$rs2),
           (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(not (shiftopw<riscv_sllw> (not GPR:$rs1), GPR:$rs2)),
-          (SLOW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(not (shiftopw<riscv_srlw> (not GPR:$rs1), GPR:$rs2)),
-          (SROW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
 def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2),
           (ROLW GPR:$rs1, GPR:$rs2)>;
@@ -982,13 +922,6 @@ def : Pat<(xor (assertsexti32 GPR:$rs1), BSETINVWMask:$mask),
 
 } // Predicates = [HasStdExtZbs, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(sext_inreg (SLOIPat GPR:$rs1, uimm5:$shamt), i32),
-          (SLOIW GPR:$rs1, uimm5:$shamt)>;
-def : Pat<(SROIWPat GPR:$rs1, uimm5:$shamt),
-          (SROIW GPR:$rs1, uimm5:$shamt)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
 let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(riscv_rorw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
 def : Pat<(riscv_rolw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
index de315dfb2d5a..ec1720337dda 100644
--- a/llvm/test/CodeGen/RISCV/rv32Zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
@@ -6,510 +6,6 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32IBP
 
-define i32 @slo_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: slo_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    sll a0, a0, a1
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: slo_i32:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    slo a0, a0, a1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: slo_i32:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    slo a0, a0, a1
-; RV32IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %shl = shl i32 %neg, %b
-  %neg1 = xor i32 %shl, -1
-  ret i32 %neg1
-}
-
-define i32 @slo_i32_mask(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: slo_i32_mask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    sll a0, a0, a1
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: slo_i32_mask:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    slo a0, a0, a1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: slo_i32_mask:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    slo a0, a0, a1
-; RV32IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %and = and i32 %b, 31
-  %shl = shl i32 %neg, %and
-  %neg1 = xor i32 %shl, -1
-  ret i32 %neg1
-}
-
-; As we are not matching directly i64 code patterns on RV32 some i64 patterns
-; don't have yet any matching bit manipulation instructions on RV32.
-; This test is presented here in case future expansions of the experimental-b
-; extension introduce instructions suitable for this pattern.
-
-define i64 @slo_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: slo_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi a3, a2, -32
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    bltz a3, .LBB2_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a2, zero
-; RV32I-NEXT:    sll a1, a0, a3
-; RV32I-NEXT:    j .LBB2_3
-; RV32I-NEXT:  .LBB2_2:
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    sll a1, a1, a2
-; RV32I-NEXT:    addi a3, zero, 31
-; RV32I-NEXT:    sub a3, a3, a2
-; RV32I-NEXT:    srli a4, a0, 1
-; RV32I-NEXT:    srl a3, a4, a3
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    sll a2, a0, a2
-; RV32I-NEXT:  .LBB2_3:
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: slo_i64:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    sll a1, a1, a2
-; RV32IB-NEXT:    addi a3, zero, 31
-; RV32IB-NEXT:    sub a3, a3, a2
-; RV32IB-NEXT:    srli a4, a0, 1
-; RV32IB-NEXT:    srl a3, a4, a3
-; RV32IB-NEXT:    or a1, a1, a3
-; RV32IB-NEXT:    addi a3, a2, -32
-; RV32IB-NEXT:    sll a4, a0, a3
-; RV32IB-NEXT:    slti a5, a3, 0
-; RV32IB-NEXT:    cmov a1, a5, a1, a4
-; RV32IB-NEXT:    sll a0, a0, a2
-; RV32IB-NEXT:    srai a2, a3, 31
-; RV32IB-NEXT:    and a0, a2, a0
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: slo_i64:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    addi a3, a2, -32
-; RV32IBP-NEXT:    not a0, a0
-; RV32IBP-NEXT:    bltz a3, .LBB2_2
-; RV32IBP-NEXT:  # %bb.1:
-; RV32IBP-NEXT:    mv a2, zero
-; RV32IBP-NEXT:    sll a1, a0, a3
-; RV32IBP-NEXT:    j .LBB2_3
-; RV32IBP-NEXT:  .LBB2_2:
-; RV32IBP-NEXT:    not a1, a1
-; RV32IBP-NEXT:    sll a1, a1, a2
-; RV32IBP-NEXT:    addi a3, zero, 31
-; RV32IBP-NEXT:    sub a3, a3, a2
-; RV32IBP-NEXT:    srli a4, a0, 1
-; RV32IBP-NEXT:    srl a3, a4, a3
-; RV32IBP-NEXT:    or a1, a1, a3
-; RV32IBP-NEXT:    sll a2, a0, a2
-; RV32IBP-NEXT:  .LBB2_3:
-; RV32IBP-NEXT:    not a1, a1
-; RV32IBP-NEXT:    not a0, a2
-; RV32IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %shl = shl i64 %neg, %b
-  %neg1 = xor i64 %shl, -1
-  ret i64 %neg1
-}
-
-define i64 @slo_i64_mask(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: slo_i64_mask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a3, a2, 63
-; RV32I-NEXT:    addi a4, a3, -32
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    bltz a4, .LBB3_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a2, zero
-; RV32I-NEXT:    sll a1, a0, a4
-; RV32I-NEXT:    j .LBB3_3
-; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    sll a1, a1, a2
-; RV32I-NEXT:    addi a4, zero, 31
-; RV32I-NEXT:    sub a3, a4, a3
-; RV32I-NEXT:    srli a4, a0, 1
-; RV32I-NEXT:    srl a3, a4, a3
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    sll a2, a0, a2
-; RV32I-NEXT:  .LBB3_3:
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    not a0, a2
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: slo_i64_mask:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    sll a1, a1, a2
-; RV32IB-NEXT:    andi a3, a2, 63
-; RV32IB-NEXT:    addi a4, zero, 31
-; RV32IB-NEXT:    sub a4, a4, a3
-; RV32IB-NEXT:    srli a5, a0, 1
-; RV32IB-NEXT:    srl a4, a5, a4
-; RV32IB-NEXT:    or a1, a1, a4
-; RV32IB-NEXT:    addi a3, a3, -32
-; RV32IB-NEXT:    sll a4, a0, a3
-; RV32IB-NEXT:    slti a5, a3, 0
-; RV32IB-NEXT:    cmov a1, a5, a1, a4
-; RV32IB-NEXT:    sll a0, a0, a2
-; RV32IB-NEXT:    srai a2, a3, 31
-; RV32IB-NEXT:    and a0, a2, a0
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: slo_i64_mask:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    andi a3, a2, 63
-; RV32IBP-NEXT:    addi a4, a3, -32
-; RV32IBP-NEXT:    not a0, a0
-; RV32IBP-NEXT:    bltz a4, .LBB3_2
-; RV32IBP-NEXT:  # %bb.1:
-; RV32IBP-NEXT:    mv a2, zero
-; RV32IBP-NEXT:    sll a1, a0, a4
-; RV32IBP-NEXT:    j .LBB3_3
-; RV32IBP-NEXT:  .LBB3_2:
-; RV32IBP-NEXT:    not a1, a1
-; RV32IBP-NEXT:    sll a1, a1, a2
-; RV32IBP-NEXT:    addi a4, zero, 31
-; RV32IBP-NEXT:    sub a3, a4, a3
-; RV32IBP-NEXT:    srli a4, a0, 1
-; RV32IBP-NEXT:    srl a3, a4, a3
-; RV32IBP-NEXT:    or a1, a1, a3
-; RV32IBP-NEXT:    sll a2, a0, a2
-; RV32IBP-NEXT:  .LBB3_3:
-; RV32IBP-NEXT:    not a1, a1
-; RV32IBP-NEXT:    not a0, a2
-; RV32IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %and = and i64 %b, 63
-  %shl = shl i64 %neg, %and
-  %neg1 = xor i64 %shl, -1
-  ret i64 %neg1
-}
-
-define i32 @sro_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: sro_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    srl a0, a0, a1
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sro_i32:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    sro a0, a0, a1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sro_i32:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    sro a0, a0, a1
-; RV32IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %shr = lshr i32 %neg, %b
-  %neg1 = xor i32 %shr, -1
-  ret i32 %neg1
-}
-
-define i32 @sro_i32_mask(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: sro_i32_mask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    srl a0, a0, a1
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sro_i32_mask:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    sro a0, a0, a1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sro_i32_mask:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    sro a0, a0, a1
-; RV32IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %and = and i32 %b, 31
-  %shr = lshr i32 %neg, %and
-  %neg1 = xor i32 %shr, -1
-  ret i32 %neg1
-}
-
-; As we are not matching directly i64 code patterns on RV32 some i64 patterns
-; don't have yet any matching bit manipulation instructions on RV32.
-; This test is presented here in case future expansions of the experimental-b
-; extension introduce instructions suitable for this pattern.
-
-define i64 @sro_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: sro_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi a3, a2, -32
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    bltz a3, .LBB6_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a2, zero
-; RV32I-NEXT:    srl a0, a1, a3
-; RV32I-NEXT:    j .LBB6_3
-; RV32I-NEXT:  .LBB6_2:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    srl a0, a0, a2
-; RV32I-NEXT:    addi a3, zero, 31
-; RV32I-NEXT:    sub a3, a3, a2
-; RV32I-NEXT:    slli a4, a1, 1
-; RV32I-NEXT:    sll a3, a4, a3
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    srl a2, a1, a2
-; RV32I-NEXT:  .LBB6_3:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sro_i64:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    srl a0, a0, a2
-; RV32IB-NEXT:    addi a3, zero, 31
-; RV32IB-NEXT:    sub a3, a3, a2
-; RV32IB-NEXT:    slli a4, a1, 1
-; RV32IB-NEXT:    sll a3, a4, a3
-; RV32IB-NEXT:    or a0, a0, a3
-; RV32IB-NEXT:    addi a3, a2, -32
-; RV32IB-NEXT:    srl a4, a1, a3
-; RV32IB-NEXT:    slti a5, a3, 0
-; RV32IB-NEXT:    cmov a0, a5, a0, a4
-; RV32IB-NEXT:    srl a1, a1, a2
-; RV32IB-NEXT:    srai a2, a3, 31
-; RV32IB-NEXT:    and a1, a2, a1
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sro_i64:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    addi a3, a2, -32
-; RV32IBP-NEXT:    not a1, a1
-; RV32IBP-NEXT:    bltz a3, .LBB6_2
-; RV32IBP-NEXT:  # %bb.1:
-; RV32IBP-NEXT:    mv a2, zero
-; RV32IBP-NEXT:    srl a0, a1, a3
-; RV32IBP-NEXT:    j .LBB6_3
-; RV32IBP-NEXT:  .LBB6_2:
-; RV32IBP-NEXT:    not a0, a0
-; RV32IBP-NEXT:    srl a0, a0, a2
-; RV32IBP-NEXT:    addi a3, zero, 31
-; RV32IBP-NEXT:    sub a3, a3, a2
-; RV32IBP-NEXT:    slli a4, a1, 1
-; RV32IBP-NEXT:    sll a3, a4, a3
-; RV32IBP-NEXT:    or a0, a0, a3
-; RV32IBP-NEXT:    srl a2, a1, a2
-; RV32IBP-NEXT:  .LBB6_3:
-; RV32IBP-NEXT:    not a0, a0
-; RV32IBP-NEXT:    not a1, a2
-; RV32IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %shr = lshr i64 %neg, %b
-  %neg1 = xor i64 %shr, -1
-  ret i64 %neg1
-}
-
-define i64 @sro_i64_mask(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: sro_i64_mask:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a3, a2, 63
-; RV32I-NEXT:    addi a4, a3, -32
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    bltz a4, .LBB7_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a2, zero
-; RV32I-NEXT:    srl a0, a1, a4
-; RV32I-NEXT:    j .LBB7_3
-; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    srl a0, a0, a2
-; RV32I-NEXT:    addi a4, zero, 31
-; RV32I-NEXT:    sub a3, a4, a3
-; RV32I-NEXT:    slli a4, a1, 1
-; RV32I-NEXT:    sll a3, a4, a3
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    srl a2, a1, a2
-; RV32I-NEXT:  .LBB7_3:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    not a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sro_i64_mask:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    srl a0, a0, a2
-; RV32IB-NEXT:    andi a3, a2, 63
-; RV32IB-NEXT:    addi a4, zero, 31
-; RV32IB-NEXT:    sub a4, a4, a3
-; RV32IB-NEXT:    slli a5, a1, 1
-; RV32IB-NEXT:    sll a4, a5, a4
-; RV32IB-NEXT:    or a0, a0, a4
-; RV32IB-NEXT:    addi a3, a3, -32
-; RV32IB-NEXT:    srl a4, a1, a3
-; RV32IB-NEXT:    slti a5, a3, 0
-; RV32IB-NEXT:    cmov a0, a5, a0, a4
-; RV32IB-NEXT:    srl a1, a1, a2
-; RV32IB-NEXT:    srai a2, a3, 31
-; RV32IB-NEXT:    and a1, a2, a1
-; RV32IB-NEXT:    not a0, a0
-; RV32IB-NEXT:    not a1, a1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sro_i64_mask:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    andi a3, a2, 63
-; RV32IBP-NEXT:    addi a4, a3, -32
-; RV32IBP-NEXT:    not a1, a1
-; RV32IBP-NEXT:    bltz a4, .LBB7_2
-; RV32IBP-NEXT:  # %bb.1:
-; RV32IBP-NEXT:    mv a2, zero
-; RV32IBP-NEXT:    srl a0, a1, a4
-; RV32IBP-NEXT:    j .LBB7_3
-; RV32IBP-NEXT:  .LBB7_2:
-; RV32IBP-NEXT:    not a0, a0
-; RV32IBP-NEXT:    srl a0, a0, a2
-; RV32IBP-NEXT:    addi a4, zero, 31
-; RV32IBP-NEXT:    sub a3, a4, a3
-; RV32IBP-NEXT:    slli a4, a1, 1
-; RV32IBP-NEXT:    sll a3, a4, a3
-; RV32IBP-NEXT:    or a0, a0, a3
-; RV32IBP-NEXT:    srl a2, a1, a2
-; RV32IBP-NEXT:  .LBB7_3:
-; RV32IBP-NEXT:    not a0, a0
-; RV32IBP-NEXT:    not a1, a2
-; RV32IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %and = and i64 %b, 63
-  %shr = lshr i64 %neg, %and
-  %neg1 = xor i64 %shr, -1
-  ret i64 %neg1
-}
-
-define i32 @sloi_i32(i32 %a) nounwind {
-; RV32I-LABEL: sloi_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    ori a0, a0, 1
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sloi_i32:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    sloi a0, a0, 1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sloi_i32:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    sloi a0, a0, 1
-; RV32IBP-NEXT:    ret
-  %neg = shl i32 %a, 1
-  %neg12 = or i32 %neg, 1
-  ret i32 %neg12
-}
-
-define i64 @sloi_i64(i64 %a) nounwind {
-; RV32I-LABEL: sloi_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a2, a0, 31
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    ori a0, a0, 1
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sloi_i64:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    fsri a1, a0, a1, 31
-; RV32IB-NEXT:    sloi a0, a0, 1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sloi_i64:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    srli a2, a0, 31
-; RV32IBP-NEXT:    slli a1, a1, 1
-; RV32IBP-NEXT:    or a1, a1, a2
-; RV32IBP-NEXT:    sloi a0, a0, 1
-; RV32IBP-NEXT:    ret
-  %neg = shl i64 %a, 1
-  %neg12 = or i64 %neg, 1
-  ret i64 %neg12
-}
-
-define i32 @sroi_i32(i32 %a) nounwind {
-; RV32I-LABEL: sroi_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a0, a0, 1
-; RV32I-NEXT:    lui a1, 524288
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sroi_i32:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    sroi a0, a0, 1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sroi_i32:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    sroi a0, a0, 1
-; RV32IBP-NEXT:    ret
-  %neg = lshr i32 %a, 1
-  %neg12 = or i32 %neg, -2147483648
-  ret i32 %neg12
-}
-
-define i64 @sroi_i64(i64 %a) nounwind {
-; RV32I-LABEL: sroi_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a2, a1, 31
-; RV32I-NEXT:    srli a0, a0, 1
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    srli a1, a1, 1
-; RV32I-NEXT:    lui a2, 524288
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32IB-LABEL: sroi_i64:
-; RV32IB:       # %bb.0:
-; RV32IB-NEXT:    fsri a0, a0, a1, 1
-; RV32IB-NEXT:    sroi a1, a1, 1
-; RV32IB-NEXT:    ret
-;
-; RV32IBP-LABEL: sroi_i64:
-; RV32IBP:       # %bb.0:
-; RV32IBP-NEXT:    slli a2, a1, 31
-; RV32IBP-NEXT:    srli a0, a0, 1
-; RV32IBP-NEXT:    or a0, a0, a2
-; RV32IBP-NEXT:    sroi a1, a1, 1
-; RV32IBP-NEXT:    ret
-  %neg = lshr i64 %a, 1
-  %neg12 = or i64 %neg, -9223372036854775808
-  ret i64 %neg12
-}
-
 define i32 @gorc1_i32(i32 %a) nounwind {
 ; RV32I-LABEL: gorc1_i32:
 ; RV32I:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
index d2191afd5b79..685c1a0225ac 100644
--- a/llvm/test/CodeGen/RISCV/rv64Zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
@@ -6,312 +6,6 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64IBP
 
-define signext i32 @slo_i32(i32 signext %a, i32 signext %b) nounwind {
-; RV64I-LABEL: slo_i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    sllw a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: slo_i32:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slow a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: slo_i32:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    slow a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %shl = shl i32 %neg, %b
-  %neg1 = xor i32 %shl, -1
-  ret i32 %neg1
-}
-
-define signext i32 @slo_i32_mask(i32 signext %a, i32 signext %b) nounwind {
-; RV64I-LABEL: slo_i32_mask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    sllw a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: slo_i32_mask:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slow a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: slo_i32_mask:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    slow a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %and = and i32 %b, 31
-  %shl = shl i32 %neg, %and
-  %neg1 = xor i32 %shl, -1
-  ret i32 %neg1
-}
-
-define i64 @slo_i64(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: slo_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    sll a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: slo_i64:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slo a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: slo_i64:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    slo a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %shl = shl i64 %neg, %b
-  %neg1 = xor i64 %shl, -1
-  ret i64 %neg1
-}
-
-define i64 @slo_i64_mask(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: slo_i64_mask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    sll a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: slo_i64_mask:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    slo a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: slo_i64_mask:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    slo a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %and = and i64 %b, 63
-  %shl = shl i64 %neg, %and
-  %neg1 = xor i64 %shl, -1
-  ret i64 %neg1
-}
-
-define signext i32 @sro_i32(i32 signext %a, i32 signext %b) nounwind {
-; RV64I-LABEL: sro_i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    srlw a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sro_i32:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    srow a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sro_i32:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    srow a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %shr = lshr i32 %neg, %b
-  %neg1 = xor i32 %shr, -1
-  ret i32 %neg1
-}
-
-define signext i32 @sro_i32_mask(i32 signext %a, i32 signext %b) nounwind {
-; RV64I-LABEL: sro_i32_mask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    srlw a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sro_i32_mask:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    srow a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sro_i32_mask:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    srow a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %and = and i32 %b, 31
-  %shr = lshr i32 %neg, %and
-  %neg1 = xor i32 %shr, -1
-  ret i32 %neg1
-}
-
-define i64 @sro_i64(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: sro_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    srl a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sro_i64:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    sro a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sro_i64:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    sro a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %shr = lshr i64 %neg, %b
-  %neg1 = xor i64 %shr, -1
-  ret i64 %neg1
-}
-
-define i64 @sro_i64_mask(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: sro_i64_mask:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    srl a0, a0, a1
-; RV64I-NEXT:    not a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sro_i64_mask:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    sro a0, a0, a1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sro_i64_mask:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    sro a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %and = and i64 %b, 63
-  %shr = lshr i64 %neg, %and
-  %neg1 = xor i64 %shr, -1
-  ret i64 %neg1
-}
-
-define signext i32 @sloi_i32(i32 signext %a) nounwind {
-; RV64I-LABEL: sloi_i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    ori a0, a0, 1
-; RV64I-NEXT:    sext.w a0, a0
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sloi_i32:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    sloiw a0, a0, 1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sloi_i32:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    sloiw a0, a0, 1
-; RV64IBP-NEXT:    ret
-  %neg = shl i32 %a, 1
-  %neg12 = or i32 %neg, 1
-  ret i32 %neg12
-}
-
-define i64 @sloi_i64(i64 %a) nounwind {
-; RV64I-LABEL: sloi_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    ori a0, a0, 1
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sloi_i64:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    sloi a0, a0, 1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sloi_i64:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    sloi a0, a0, 1
-; RV64IBP-NEXT:    ret
-  %neg = shl i64 %a, 1
-  %neg12 = or i64 %neg, 1
-  ret i64 %neg12
-}
-
-define signext i32 @sroi_i32(i32 signext %a) nounwind {
-; RV64I-LABEL: sroi_i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a0, a0, 1
-; RV64I-NEXT:    lui a1, 524288
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sroi_i32:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    sroiw a0, a0, 1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sroi_i32:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    sroiw a0, a0, 1
-; RV64IBP-NEXT:    ret
-  %neg = lshr i32 %a, 1
-  %neg12 = or i32 %neg, -2147483648
-  ret i32 %neg12
-}
-
-; This is similar to the type legalized version of sroiw but the mask is 0 in
-; the upper bits instead of 1 so the result is not sign extended. Make sure we
-; don't match it to sroiw.
-define i64 @sroiw_bug(i64 %a) nounwind {
-; RV64I-LABEL: sroiw_bug:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a0, a0, 1
-; RV64I-NEXT:    addi a1, zero, 1
-; RV64I-NEXT:    slli a1, a1, 31
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sroiw_bug:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    srli a0, a0, 1
-; RV64IB-NEXT:    bseti a0, a0, 31
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sroiw_bug:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    srli a0, a0, 1
-; RV64IBP-NEXT:    addi a1, zero, 1
-; RV64IBP-NEXT:    slli a1, a1, 31
-; RV64IBP-NEXT:    or a0, a0, a1
-; RV64IBP-NEXT:    ret
-  %neg = lshr i64 %a, 1
-  %neg12 = or i64 %neg, 2147483648
-  ret i64 %neg12
-}
-
-define i64 @sroi_i64(i64 %a) nounwind {
-; RV64I-LABEL: sroi_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a0, a0, 1
-; RV64I-NEXT:    addi a1, zero, -1
-; RV64I-NEXT:    slli a1, a1, 63
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    ret
-;
-; RV64IB-LABEL: sroi_i64:
-; RV64IB:       # %bb.0:
-; RV64IB-NEXT:    sroi a0, a0, 1
-; RV64IB-NEXT:    ret
-;
-; RV64IBP-LABEL: sroi_i64:
-; RV64IBP:       # %bb.0:
-; RV64IBP-NEXT:    sroi a0, a0, 1
-; RV64IBP-NEXT:    ret
-  %neg = lshr i64 %a, 1
-  %neg12 = or i64 %neg, -9223372036854775808
-  ret i64 %neg12
-}
-
 define signext i32 @gorc1_i32(i32 signext %a) nounwind {
 ; RV64I-LABEL: gorc1_i32:
 ; RV64I:       # %bb.0:
diff --git a/llvm/test/MC/RISCV/rv32zbp-invalid.s b/llvm/test/MC/RISCV/rv32zbp-invalid.s
index 6de719a250dd..11e7e8338377 100644
--- a/llvm/test/MC/RISCV/rv32zbp-invalid.s
+++ b/llvm/test/MC/RISCV/rv32zbp-invalid.s
@@ -1,19 +1,5 @@
 # RUN: not llvm-mc -triple riscv32 -mattr=+experimental-b,experimental-zbp < %s 2>&1 | FileCheck %s
 
-# Too few operands
-slo t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Too few operands
-sro t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Too few operands
-sloi t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Immediate operand out of range
-sloi t0, t1, 32 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 31]
-sloi t0, t1, -1 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 31]
-# Too few operands
-sroi t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Immediate operand out of range
-sroi t0, t1, 32 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 31]
-sroi t0, t1, -1 # CHECK: :[[@LINE]]:14: error: immediate must be an integer in the range [0, 31]
 # Too few operands
 gorc t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 # Too few operands
@@ -54,10 +40,6 @@ xperm.n t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 xperm.b t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 # Too few operands
 xperm.h t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-slow t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
-srow t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
-sloiw t0, t1, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
-sroiw t0, t1, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
 gorcw t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
 grevw t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
 gorciw t0, t1, 0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set
diff --git a/llvm/test/MC/RISCV/rv32zbp-valid.s b/llvm/test/MC/RISCV/rv32zbp-valid.s
index 2e531343912b..0d7fc033c6a8 100644
--- a/llvm/test/MC/RISCV/rv32zbp-valid.s
+++ b/llvm/test/MC/RISCV/rv32zbp-valid.s
@@ -22,18 +22,6 @@
 # RUN:     | llvm-objdump --mattr=+experimental-zbp -d -r - \
 # RUN:     | FileCheck --check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s
 
-# CHECK-ASM-AND-OBJ: slo t0, t1, t2
-# CHECK-ASM: encoding: [0xb3,0x12,0x73,0x20]
-slo t0, t1, t2
-# CHECK-ASM-AND-OBJ: sro t0, t1, t2
-# CHECK-ASM: encoding: [0xb3,0x52,0x73,0x20]
-sro t0, t1, t2
-# CHECK-ASM-AND-OBJ: sloi t0, t1, 0
-# CHECK-ASM: encoding: [0x93,0x12,0x03,0x20]
-sloi t0, t1, 0
-# CHECK-ASM-AND-OBJ: sroi t0, t1, 0
-# CHECK-ASM: encoding: [0x93,0x52,0x03,0x20]
-sroi t0, t1, 0
 # CHECK-ASM-AND-OBJ: gorc t0, t1, t2
 # CHECK-ASM: encoding: [0xb3,0x52,0x73,0x28]
 gorc t0, t1, t2
diff --git a/llvm/test/MC/RISCV/rv64zbp-invalid.s b/llvm/test/MC/RISCV/rv64zbp-invalid.s
index 88adf2d47779..d5b37b2f8dab 100644
--- a/llvm/test/MC/RISCV/rv64zbp-invalid.s
+++ b/llvm/test/MC/RISCV/rv64zbp-invalid.s
@@ -1,19 +1,5 @@
 # RUN: not llvm-mc -triple riscv64 -mattr=+experimental-b,experimental-zbp < %s 2>&1 | FileCheck %s
 
-# Too few operands
-slow t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Too few operands
-srow t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Too few operands
-sloiw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Immediate operand out of range
-sloiw t0, t1, 32 # CHECK: :[[@LINE]]:15: error: immediate must be an integer in the range [0, 31]
-sloiw t0, t1, -1 # CHECK: :[[@LINE]]:15: error: immediate must be an integer in the range [0, 31]
-# Too few operands
-sroiw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
-# Immediate operand out of range
-sroiw t0, t1, 32 # CHECK: :[[@LINE]]:15: error: immediate must be an integer in the range [0, 31]
-sroiw t0, t1, -1 # CHECK: :[[@LINE]]:15: error: immediate must be an integer in the range [0, 31]
 # Too few operands
 gorcw t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 # Too few operands
diff --git a/llvm/test/MC/RISCV/rv64zbp-valid.s b/llvm/test/MC/RISCV/rv64zbp-valid.s
index 300464da325b..6647927c7150 100644
--- a/llvm/test/MC/RISCV/rv64zbp-valid.s
+++ b/llvm/test/MC/RISCV/rv64zbp-valid.s
@@ -12,18 +12,6 @@
 # RUN:     | llvm-objdump --mattr=+experimental-zbp -d -r - \
 # RUN:     | FileCheck --check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s
 
-# CHECK-ASM-AND-OBJ: slow t0, t1, t2
-# CHECK-ASM: encoding: [0xbb,0x12,0x73,0x20]
-slow t0, t1, t2
-# CHECK-ASM-AND-OBJ: srow t0, t1, t2
-# CHECK-ASM: encoding: [0xbb,0x52,0x73,0x20]
-srow t0, t1, t2
-# CHECK-ASM-AND-OBJ: sloiw t0, t1, 0
-# CHECK-ASM: encoding: [0x9b,0x12,0x03,0x20]
-sloiw t0, t1, 0
-# CHECK-ASM-AND-OBJ: sroiw t0, t1, 0
-# CHECK-ASM: encoding: [0x9b,0x52,0x03,0x20]
-sroiw t0, t1, 0
 # CHECK-ASM-AND-OBJ: gorcw t0, t1, t2
 # CHECK-ASM: encoding: [0xbb,0x52,0x73,0x28]
 gorcw t0, t1, t2

From 6aff13f9b05df8ab2ecebf6479204b17d8d8eed6 Mon Sep 17 00:00:00 2001
From: "Wang, Pengfei" <pengfei.wang@intel.com>
Date: Tue, 9 Feb 2021 21:12:59 +0800
Subject: [PATCH 106/244] [X86] Always assign reassoc flag for intrinsics
 *reduce_add/mul_ps/pd.

Intrinsics *reduce_add/mul_ps/pd have assumption that the elements in
the vector are reassociable. So we need to always assign the reassoc
flag when we call _mm_reduce_* intrinsics.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D96231

(cherry picked from commit dd2460ed5d77d908327ce29a15630cd3268bd76e)
---
 clang/lib/CodeGen/CGBuiltin.cpp              |  2 +
 clang/lib/Headers/avx512fintrin.h            | 16 +++--
 clang/test/CodeGen/X86/avx512-reduceIntrin.c | 68 +++++++++++---------
 3 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 113541bd5024..10e3820d9657 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13794,12 +13794,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_reduce_fadd_ps512: {
     Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
+    Builder.getFastMathFlags().setAllowReassoc(true);
     return Builder.CreateCall(F, {Ops[0], Ops[1]});
   }
   case X86::BI__builtin_ia32_reduce_fmul_pd512:
   case X86::BI__builtin_ia32_reduce_fmul_ps512: {
     Function *F =
         CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
+    Builder.getFastMathFlags().setAllowReassoc(true);
     return Builder.CreateCall(F, {Ops[0], Ops[1]});
   }
   case X86::BI__builtin_ia32_reduce_mul_d512:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 2ee4350b14d4..f226382cbb2c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9297,9 +9297,12 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
 
 /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
  * outputs. This class of vector operation forms the basis of many scientific
- * computations. In vector-reduction arithmetic, the evaluation off is
+ * computations. In vector-reduction arithmetic, the evaluation order is
  * independent of the order of the input elements of V.
 
+ * For floating point types, we always assume the elements are reassociable even
+ * if -fast-math is off.
+
  * Used bisection method. At each step, we partition the vector with previous
  * step in half, and the operation is performed on its two halves.
  * This takes log2(n) steps where n is the number of elements in the vector.
@@ -9345,8 +9348,11 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   return __builtin_ia32_reduce_or_q512(__W);
 }
 
+// -0.0 is used to ignore the start value since it is the neutral value of
+// floating point addition. For more information, please refer to
+// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
-  return __builtin_ia32_reduce_fadd_pd512(0.0, __W);
+  return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
@@ -9356,7 +9362,7 @@ static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
 static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
   __W = _mm512_maskz_mov_pd(__M, __W);
-  return __builtin_ia32_reduce_fadd_pd512(0.0, __W);
+  return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS512
@@ -9411,7 +9417,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
 
 static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_ps(__m512 __W) {
-  return __builtin_ia32_reduce_fadd_ps512(0.0f, __W);
+  return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
 }
 
 static __inline__ float __DEFAULT_FN_ATTRS512
@@ -9422,7 +9428,7 @@ _mm512_reduce_mul_ps(__m512 __W) {
 static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
   __W = _mm512_maskz_mov_ps(__M, __W);
-  return __builtin_ia32_reduce_fadd_ps512(0.0f, __W);
+  return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
 }
 
 static __inline__ float __DEFAULT_FN_ATTRS512
diff --git a/clang/test/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CodeGen/X86/avx512-reduceIntrin.c
index d8a1130f3cef..62580ca1914e 100644
--- a/clang/test/CodeGen/X86/avx512-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512-reduceIntrin.c
@@ -11,13 +11,13 @@ long long test_mm512_reduce_add_epi64(__m512i __W){
 long long test_mm512_reduce_mul_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_epi64(
 // CHECK:    call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
-  return _mm512_reduce_mul_epi64(__W); 
+  return _mm512_reduce_mul_epi64(__W);
 }
 
 long long test_mm512_reduce_or_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_or_epi64(
 // CHECK:    call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
-  return _mm512_reduce_or_epi64(__W); 
+  return _mm512_reduce_or_epi64(__W);
 }
 
 long long test_mm512_reduce_and_epi64(__m512i __W){
@@ -31,7 +31,7 @@ long long test_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W){
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
 // CHECK:    call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
-  return _mm512_mask_reduce_add_epi64(__M, __W); 
+  return _mm512_mask_reduce_add_epi64(__M, __W);
 }
 
 long long test_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W){
@@ -39,7 +39,7 @@ long long test_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W){
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
 // CHECK:    call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
-  return _mm512_mask_reduce_mul_epi64(__M, __W); 
+  return _mm512_mask_reduce_mul_epi64(__M, __W);
 }
 
 long long test_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W){
@@ -47,7 +47,7 @@ long long test_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W){
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
 // CHECK:    call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
-  return _mm512_mask_reduce_and_epi64(__M, __W); 
+  return _mm512_mask_reduce_and_epi64(__M, __W);
 }
 
 long long test_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W){
@@ -55,30 +55,30 @@ long long test_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W){
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
 // CHECK:    call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
-  return _mm512_mask_reduce_or_epi64(__M, __W); 
+  return _mm512_mask_reduce_or_epi64(__M, __W);
 }
 
 int test_mm512_reduce_add_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_add_epi32(
 // CHECK:    call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
-  return _mm512_reduce_add_epi32(__W); 
+  return _mm512_reduce_add_epi32(__W);
 }
 
 int test_mm512_reduce_mul_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_epi32(
 // CHECK:    call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
-  return _mm512_reduce_mul_epi32(__W); 
+  return _mm512_reduce_mul_epi32(__W);
 }
 
 int test_mm512_reduce_or_epi32(__m512i __W){
 // CHECK:    call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
-  return _mm512_reduce_or_epi32(__W); 
+  return _mm512_reduce_or_epi32(__W);
 }
 
 int test_mm512_reduce_and_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_and_epi32(
 // CHECK:    call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
-  return _mm512_reduce_and_epi32(__W); 
+  return _mm512_reduce_and_epi32(__W);
 }
 
 int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
@@ -86,7 +86,7 @@ int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
 // CHECK:    call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
-  return _mm512_mask_reduce_add_epi32(__M, __W); 
+  return _mm512_mask_reduce_add_epi32(__M, __W);
 }
 
 int test_mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W){
@@ -94,7 +94,7 @@ int test_mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W){
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
 // CHECK:    call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
-  return _mm512_mask_reduce_mul_epi32(__M, __W); 
+  return _mm512_mask_reduce_mul_epi32(__M, __W);
 }
 
 int test_mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W){
@@ -102,7 +102,7 @@ int test_mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W){
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
 // CHECK:    call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
-  return _mm512_mask_reduce_and_epi32(__M, __W); 
+  return _mm512_mask_reduce_and_epi32(__M, __W);
 }
 
 int test_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W){
@@ -110,61 +110,65 @@ int test_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W){
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
 // CHECK:    call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
-  return _mm512_mask_reduce_or_epi32(__M, __W); 
+  return _mm512_mask_reduce_or_epi32(__M, __W);
 }
 
-double test_mm512_reduce_add_pd(__m512d __W){
+double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
 // CHECK-LABEL: @test_mm512_reduce_add_pd(
-// CHECK:    call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> %{{.*}})
-  return _mm512_reduce_add_pd(__W); 
+// CHECK-NOT: reassoc
+// CHECK:    call reassoc double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
+// CHECK-NOT: reassoc
+  return _mm512_reduce_add_pd(__W) + ExtraAddOp;
 }
 
-double test_mm512_reduce_mul_pd(__m512d __W){
+double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
 // CHECK-LABEL: @test_mm512_reduce_mul_pd(
-// CHECK:    call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
-  return _mm512_reduce_mul_pd(__W); 
+// CHECK-NOT: reassoc
+// CHECK:    call reassoc double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
+// CHECK-NOT: reassoc
+  return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
 }
 
 float test_mm512_reduce_add_ps(__m512 __W){
 // CHECK-LABEL: @test_mm512_reduce_add_ps(
-// CHECK:    call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %{{.*}})
-  return _mm512_reduce_add_ps(__W); 
+// CHECK:    call reassoc float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
+  return _mm512_reduce_add_ps(__W);
 }
 
 float test_mm512_reduce_mul_ps(__m512 __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_ps(
-// CHECK:    call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
-  return _mm512_reduce_mul_ps(__W); 
+// CHECK:    call reassoc float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
+  return _mm512_reduce_mul_ps(__W);
 }
 
 double test_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_add_pd(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    call double @llvm.vector.reduce.fadd.v8f64(double 0.000000e+00, <8 x double> %{{.*}})
-  return _mm512_mask_reduce_add_pd(__M, __W); 
+// CHECK:    call reassoc double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
+  return _mm512_mask_reduce_add_pd(__M, __W);
 }
 
 double test_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_mul_pd(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
-// CHECK:    call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
-  return _mm512_mask_reduce_mul_pd(__M, __W); 
+// CHECK:    call reassoc double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
+  return _mm512_mask_reduce_mul_pd(__M, __W);
 }
 
 float test_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_add_ps(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
-// CHECK:    call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %{{.*}})
-  return _mm512_mask_reduce_add_ps(__M, __W); 
+// CHECK:    call reassoc float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
+  return _mm512_mask_reduce_add_ps(__M, __W);
 }
 
 float test_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_mul_ps(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x float> {{.*}}, <16 x float> %{{.*}}
-// CHECK:    call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
-  return _mm512_mask_reduce_mul_ps(__M, __W); 
+// CHECK:    call reassoc float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
+  return _mm512_mask_reduce_mul_ps(__M, __W);
 }

From 23a40f7a595d5f07cb08d1d28987d7b4ca3ed766 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 8 Feb 2021 13:31:05 -0800
Subject: [PATCH 107/244] [Verifier] Allow DW_TAG_class_type/DW_TAG_union_type
 to have no filename

`clang/lib/CodeGen/CGOpenMPRuntime.cpp` synthesized union
(`distinct !DICompositeType(tag: DW_TAG_union_type, name: "kmp_cmplrdata_t", size: 64, elements: <0x62b690>)`)
does not have meaningful filename/line number.

D94735 dropped the previously arbitrary and untested filename/line from the union and caused a verifier error here.

This fixes `check-libarcher` failures.

Differential Revision: https://reviews.llvm.org/D96212

(cherry picked from commit ad60802a7187aa39b0374536be3fa176fe3d6256)
---
 llvm/lib/IR/Verifier.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 100e881c8fa8..6dd299ee9845 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1070,12 +1070,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
   if (auto *Params = N.getRawTemplateParams())
     visitTemplateParams(N, *Params);
 
-  if (N.getTag() == dwarf::DW_TAG_class_type ||
-      N.getTag() == dwarf::DW_TAG_union_type) {
-    AssertDI(N.getFile() && !N.getFile()->getFilename().empty(),
-             "class/union requires a filename", &N, N.getFile());
-  }
-
   if (auto *D = N.getRawDiscriminator()) {
     AssertDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part,
              "discriminator can only appear on variant part");

From 0d6859eb70c08704e71c844ea94c987f7b90f8d0 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Fri, 5 Feb 2021 20:33:56 +0800
Subject: [PATCH 108/244] Revert "[PowerPC] [Clang] Enable float128 feature on
 P9 by default"

Commit 6bf29dbb enables float128 feature by default for Power9 targets.
But float128 may cause build failure in libcxx testing. Revert this
commit first to unblock LLVM 12 release.

(cherry picked from commit 447dc856b243b99ce70019ba1187c39746f4e0e9)
---
 clang/lib/Basic/Targets/PPC.cpp            | 3 ---
 clang/test/Driver/ppc-f128-support-check.c | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index cfede6e6e756..ff09c0fa2a23 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -318,9 +318,6 @@ bool PPCTargetInfo::initFeatureMap(
                         .Case("pwr9", true)
                         .Case("pwr8", true)
                         .Default(false);
-  Features["float128"] = llvm::StringSwitch<bool>(CPU)
-                        .Case("pwr9", true)
-                        .Default(false);
 
   Features["spe"] = llvm::StringSwitch<bool>(CPU)
                         .Case("8548", true)
diff --git a/clang/test/Driver/ppc-f128-support-check.c b/clang/test/Driver/ppc-f128-support-check.c
index 2e4b7a7ae09c..24748905612f 100644
--- a/clang/test/Driver/ppc-f128-support-check.c
+++ b/clang/test/Driver/ppc-f128-support-check.c
@@ -1,7 +1,7 @@
 // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \
-// RUN:   -mcpu=pwr9 %s 2>&1 | FileCheck %s --check-prefix=HASF128
+// RUN:   -mcpu=pwr9 -mfloat128 %s 2>&1 | FileCheck %s --check-prefix=HASF128
 // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \
-// RUN:   -mcpu=power9 %s 2>&1 | FileCheck %s --check-prefix=HASF128
+// RUN:   -mcpu=power9 -mfloat128 %s 2>&1 | FileCheck %s --check-prefix=HASF128
 
 // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \
 // RUN:   -mcpu=pwr8 -mfloat128 %s 2>&1 | FileCheck %s --check-prefix=NOF128

From 075e2629b02d194313a069ce1ee9a8d40c7bd66f Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Wed, 27 Jan 2021 23:47:05 +0000
Subject: [PATCH 109/244] [ASTMatchers] Avoid pathological traversal over
 nested lambdas

Differential Revision: https://reviews.llvm.org/D95573

(cherry picked from commit 6f0df3cddb3e3f38df1baa7aa4d743a74bb46688)
---
 clang/include/clang/AST/RecursiveASTVisitor.h | 11 +++
 clang/lib/ASTMatchers/ASTMatchFinder.cpp      |  8 ++-
 .../ASTMatchers/ASTMatchersTraversalTest.cpp  | 72 +++++++++++++++++++
 3 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 505ea700fd0e..db2ef21f4364 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -186,6 +186,9 @@ template <typename Derived> class RecursiveASTVisitor {
   /// code, e.g., implicit constructors and destructors.
   bool shouldVisitImplicitCode() const { return false; }
 
+  /// Return whether this visitor should recurse into lambda body
+  bool shouldVisitLambdaBody() const { return true; }
+
   /// Return whether this visitor should traverse post-order.
   bool shouldTraversePostOrder() const { return false; }
 
@@ -2057,6 +2060,14 @@ bool RecursiveASTVisitor<Derived>::TraverseFunctionHelper(FunctionDecl *D) {
       // by clang.
       (!D->isDefaulted() || getDerived().shouldVisitImplicitCode());
 
+  if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
+    if (const CXXRecordDecl *RD = MD->getParent()) {
+      if (RD->isLambda()) {
+        VisitBody = VisitBody && getDerived().shouldVisitLambdaBody();
+      }
+    }
+  }
+
   if (VisitBody) {
     TRY_TO(TraverseStmt(D->getBody())); // Function body.
   }
diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 8ddd3c87e09d..5034203840fc 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -556,9 +556,9 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
         if (LE->hasExplicitResultType())
           TraverseTypeLoc(Proto.getReturnLoc());
         TraverseStmt(LE->getTrailingRequiresClause());
-
-        TraverseStmt(LE->getBody());
       }
+
+      TraverseStmt(LE->getBody());
       return true;
     }
     return RecursiveASTVisitor<MatchASTVisitor>::dataTraverseNode(S, Queue);
@@ -697,6 +697,10 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
   bool shouldVisitTemplateInstantiations() const { return true; }
   bool shouldVisitImplicitCode() const { return true; }
 
+  // We visit the lambda body explicitly, so instruct the RAV
+  // to not visit it on our behalf too.
+  bool shouldVisitLambdaBody() const { return false; }
+
   bool IsMatchingInASTNodeNotSpelledInSource() const override {
     return TraversingASTNodeNotSpelledInSource;
   }
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index 92bf244b0e4a..8004599e01a2 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -3853,6 +3853,78 @@ void binop()
   }
 }
 
+TEST(IgnoringImpCasts, PathologicalLambda) {
+
+  // Test that deeply nested lambdas are not a performance penalty
+  StringRef Code = R"cpp(
+void f() {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+  [] {
+    int i = 42;
+    (void)i;
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+  }();
+}
+  )cpp";
+
+  EXPECT_TRUE(matches(Code, integerLiteral(equals(42))));
+  EXPECT_TRUE(matches(Code, functionDecl(hasDescendant(integerLiteral(equals(42))))));
+}
+
 TEST(IgnoringImpCasts, MatchesImpCasts) {
   // This test checks that ignoringImpCasts matches when implicit casts are
   // present and its inner matcher alone does not match.

From 94607512357da5c727d210cc34e642156429d19c Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Thu, 28 Jan 2021 23:40:16 +0000
Subject: [PATCH 110/244] Ensure that we traverse non-op() method bodys of
 lambdas

Differential Revision: https://reviews.llvm.org/D95644

(cherry picked from commit 43cc4f15008f8c700497d3d2b7020bfd29f5750f)
---
 clang/include/clang/AST/RecursiveASTVisitor.h |  3 +-
 .../ASTMatchers/ASTMatchersTraversalTest.cpp  | 36 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index db2ef21f4364..7870cea198a7 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2062,7 +2062,8 @@ bool RecursiveASTVisitor<Derived>::TraverseFunctionHelper(FunctionDecl *D) {
 
   if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
     if (const CXXRecordDecl *RD = MD->getParent()) {
-      if (RD->isLambda()) {
+      if (RD->isLambda() &&
+          declaresSameEntity(RD->getLambdaCallOperator(), MD)) {
         VisitBody = VisitBody && getDerived().shouldVisitLambdaBody();
       }
     }
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index 8004599e01a2..a3a09c426673 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -474,6 +474,42 @@ TEST(Matcher, CapturesThis) {
   EXPECT_TRUE(notMatches("void f() { int z = 3; [&z](){}; }", HasCaptureThis));
 }
 
+TEST(Matcher, MatchesMethodsOnLambda) {
+  StringRef Code = R"cpp(
+struct A {
+  ~A() {}
+};
+void foo()
+{
+  A a;
+  auto l = [a] { };
+  auto lCopy = l;
+  auto lPtrDecay = +[] { };
+  (void)lPtrDecay;
+}
+)cpp";
+
+  EXPECT_TRUE(matches(
+      Code, cxxConstructorDecl(
+                hasBody(compoundStmt()),
+                hasAncestor(lambdaExpr(hasAncestor(varDecl(hasName("l"))))),
+                isCopyConstructor())));
+  EXPECT_TRUE(matches(
+      Code, cxxConstructorDecl(
+                hasBody(compoundStmt()),
+                hasAncestor(lambdaExpr(hasAncestor(varDecl(hasName("l"))))),
+                isMoveConstructor())));
+  EXPECT_TRUE(matches(
+      Code, cxxDestructorDecl(
+                hasBody(compoundStmt()),
+                hasAncestor(lambdaExpr(hasAncestor(varDecl(hasName("l"))))))));
+  EXPECT_TRUE(matches(
+      Code, cxxConversionDecl(hasBody(compoundStmt(has(returnStmt(
+                                  hasReturnValue(implicitCastExpr()))))),
+                              hasAncestor(lambdaExpr(hasAncestor(
+                                  varDecl(hasName("lPtrDecay"))))))));
+}
+
 TEST(Matcher, isClassMessage) {
   EXPECT_TRUE(matchesObjC(
       "@interface NSString +(NSString *) stringWithFormat; @end "

From 8c24a88dee6426ffa98cd820f1c8f4803bec1d86 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Wed, 27 Jan 2021 22:03:23 +0000
Subject: [PATCH 111/244] [ASTMatchers] Fix traversal below range-for elements

Differential Revision: https://reviews.llvm.org/D95562

(cherry picked from commit 79125085f16540579d27c7e4987f63eef9c4aa23)
---
 clang/lib/ASTMatchers/ASTMatchFinder.cpp      | 30 ++++++---
 .../ASTMatchers/ASTMatchersTraversalTest.cpp  | 64 +++++++++++++++++++
 2 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 5034203840fc..89e83ee61574 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -243,10 +243,14 @@ class MatchChildASTVisitor
       return true;
     ScopedIncrement ScopedDepth(&CurrentDepth);
     if (auto *Init = Node->getInit())
-      if (!match(*Init))
+      if (!traverse(*Init))
         return false;
-    if (!match(*Node->getLoopVariable()) || !match(*Node->getRangeInit()) ||
-        !match(*Node->getBody()))
+    if (!match(*Node->getLoopVariable()))
+      return false;
+    if (match(*Node->getRangeInit()))
+      if (!VisitorBase::TraverseStmt(Node->getRangeInit()))
+        return false;
+    if (!match(*Node->getBody()))
       return false;
     return VisitorBase::TraverseStmt(Node->getBody());
   }
@@ -488,15 +492,21 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
 
   bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue) {
     if (auto *RF = dyn_cast<CXXForRangeStmt>(S)) {
-      for (auto *SubStmt : RF->children()) {
-        if (SubStmt == RF->getInit() || SubStmt == RF->getLoopVarStmt() ||
-            SubStmt == RF->getRangeInit() || SubStmt == RF->getBody()) {
-          TraverseStmt(SubStmt, Queue);
-        } else {
-          ASTNodeNotSpelledInSourceScope RAII(this, true);
-          TraverseStmt(SubStmt, Queue);
+      {
+        ASTNodeNotAsIsSourceScope RAII(this, true);
+        TraverseStmt(RF->getInit());
+        // Don't traverse under the loop variable
+        match(*RF->getLoopVariable());
+        TraverseStmt(RF->getRangeInit());
+      }
+      {
+        ASTNodeNotSpelledInSourceScope RAII(this, true);
+        for (auto *SubStmt : RF->children()) {
+          if (SubStmt != RF->getBody())
+            TraverseStmt(SubStmt);
         }
       }
+      TraverseStmt(RF->getBody());
       return true;
     } else if (auto *RBO = dyn_cast<CXXRewrittenBinaryOperator>(S)) {
       {
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index a3a09c426673..cbea274cecc9 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -2820,6 +2820,36 @@ struct CtorInitsNonTrivial : NonTrivial
     EXPECT_FALSE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M)));
   }
 
+  Code = R"cpp(
+  struct Range {
+    int* begin() const;
+    int* end() const;
+  };
+  Range getRange(int);
+
+  void rangeFor()
+  {
+    for (auto i : getRange(42))
+    {
+    }
+  }
+  )cpp";
+  {
+    auto M = integerLiteral(equals(42));
+    EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M)));
+    EXPECT_TRUE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M)));
+  }
+  {
+    auto M = callExpr(hasDescendant(integerLiteral(equals(42))));
+    EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M)));
+    EXPECT_TRUE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M)));
+  }
+  {
+    auto M = compoundStmt(hasDescendant(integerLiteral(equals(42))));
+    EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M)));
+    EXPECT_TRUE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M)));
+  }
+
   Code = R"cpp(
   void rangeFor()
   {
@@ -2891,6 +2921,40 @@ struct CtorInitsNonTrivial : NonTrivial
         matchesConditionally(Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
                              true, {"-std=c++20"}));
   }
+
+  Code = R"cpp(
+  struct Range {
+    int* begin() const;
+    int* end() const;
+  };
+  Range getRange(int);
+
+  int getNum(int);
+
+  void rangeFor()
+  {
+    for (auto j = getNum(42); auto i : getRange(j))
+    {
+    }
+  }
+  )cpp";
+  {
+    auto M = integerLiteral(equals(42));
+    EXPECT_TRUE(
+        matchesConditionally(Code, traverse(TK_AsIs, M), true, {"-std=c++20"}));
+    EXPECT_TRUE(
+        matchesConditionally(Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+                             true, {"-std=c++20"}));
+  }
+  {
+    auto M = compoundStmt(hasDescendant(integerLiteral(equals(42))));
+    EXPECT_TRUE(
+        matchesConditionally(Code, traverse(TK_AsIs, M), true, {"-std=c++20"}));
+    EXPECT_TRUE(
+        matchesConditionally(Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+                             true, {"-std=c++20"}));
+  }
+
   Code = R"cpp(
 void hasDefaultArg(int i, int j = 0)
 {

From aaf23abe9d57af638644cedbc1ca6132f140a57d Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Thu, 28 Jan 2021 13:12:43 +0000
Subject: [PATCH 112/244] Fix traversal with hasDescendant into lambdas

Differential Revision: https://reviews.llvm.org/D95607

(cherry picked from commit bb57a3422a09dcdd572ccb42767a0dabb5f966dd)
---
 clang/lib/ASTMatchers/ASTMatchFinder.cpp          |  2 +-
 .../ASTMatchers/ASTMatchersTraversalTest.cpp      | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 89e83ee61574..41be3738e707 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -295,7 +295,7 @@ class MatchChildASTVisitor
     if (!match(*Node->getBody()))
       return false;
 
-    return true;
+    return VisitorBase::TraverseStmt(Node->getBody());
   }
 
   bool shouldVisitTemplateInstantiations() const { return true; }
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index cbea274cecc9..c67c40ed960a 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -3220,6 +3220,12 @@ void func14() {
   float i = 42.0;
 }
 
+void func15() {
+  int count = 0;
+  auto l = [&] { ++count; };
+  (void)l;
+}
+
 )cpp";
 
   EXPECT_TRUE(
@@ -3404,6 +3410,15 @@ void func14() {
                functionDecl(hasName("func14"), hasDescendant(floatLiteral()))),
       langCxx20OrLater()));
 
+  EXPECT_TRUE(matches(
+      Code,
+      traverse(TK_IgnoreUnlessSpelledInSource,
+               compoundStmt(
+                   hasDescendant(varDecl(hasName("count")).bind("countVar")),
+                   hasDescendant(
+                       declRefExpr(to(varDecl(equalsBoundNode("countVar"))))))),
+      langCxx20OrLater()));
+
   Code = R"cpp(
 void foo() {
     int explicit_captured = 0;

From cd25aa9e409e961d64f5fb26bff3882f3a4db2d3 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Sat, 30 Jan 2021 15:46:08 +0000
Subject: [PATCH 113/244] [ASTMatchers] Fix definition of decompositionDecl

(cherry picked from commit b10d445307a0f3c7e5522836b4331090aacaf349)
---
 clang/include/clang/ASTMatchers/ASTMatchers.h | 2 +-
 clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 6f6dfab59a39..031fa4682c3a 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -344,7 +344,7 @@ extern const internal::VariadicAllOfMatcher<Decl> decl;
 ///   int number = 42;
 ///   auto [foo, bar] = std::make_pair{42, 42};
 /// \endcode
-extern const internal::VariadicAllOfMatcher<DecompositionDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, DecompositionDecl>
     decompositionDecl;
 
 /// Matches a declaration of a linkage specification.
diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index 6c7e14e3499a..705f1cdf3153 100644
--- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -732,7 +732,7 @@ const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasDecl> typeAliasDecl;
 const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasTemplateDecl>
     typeAliasTemplateDecl;
 const internal::VariadicAllOfMatcher<Decl> decl;
-const internal::VariadicAllOfMatcher<DecompositionDecl> decompositionDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, DecompositionDecl> decompositionDecl;
 const internal::VariadicDynCastAllOfMatcher<Decl, LinkageSpecDecl>
     linkageSpecDecl;
 const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl;

From e76f4385c2e1f95781584e0ac12a586dece8223a Mon Sep 17 00:00:00 2001
From: Stephen Kelly <steveire@gmail.com>
Date: Sat, 30 Jan 2021 01:36:40 +0000
Subject: [PATCH 114/244] [ASTMatchers] Fix matching after generic top-level
 matcher

With a matcher like

  expr(anyOf(integerLiteral(equals(42)), unless(expr())))

and code such as

  struct B {
    B(int);
  };

  B func1() { return 42; }

the top-level expr() would match each of the nodes which are not spelled
in the source and then ignore-traverse to match the integerLiteral node.
This would result in multiple results reported for the integerLiteral.

Fix that by only running matching logic on nodes which are not skipped
with the top-level matcher.

Differential Revision: https://reviews.llvm.org/D95735

(cherry picked from commit d6a06365cf12bebe20a7d65cf3894608efc089b4)
---
 clang/lib/ASTMatchers/ASTMatchFinder.cpp      |  8 +++
 .../ASTMatchers/ASTMatchersTraversalTest.cpp  | 72 +++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 41be3738e707..69957a952d17 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -837,6 +837,14 @@ class MatchASTVisitor : public RecursiveASTVisitor<MatchASTVisitor>,
       if (EnableCheckProfiling)
         Timer.setBucket(&TimeByBucket[MP.second->getID()]);
       BoundNodesTreeBuilder Builder;
+
+      {
+        TraversalKindScope RAII(getASTContext(), MP.first.getTraversalKind());
+        if (getASTContext().getParentMapContext().traverseIgnored(DynNode) !=
+            DynNode)
+          continue;
+      }
+
       if (MP.first.matches(DynNode, this, &Builder)) {
         MatchVisitor Visitor(ActiveASTContext, MP.second);
         Builder.visitMatches(&Visitor);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
index c67c40ed960a..06c2bbc29e5c 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp
@@ -2519,6 +2519,78 @@ template<> bool timesTwo<bool>(bool){
     EXPECT_TRUE(matches(Code, traverse(TK_AsIs, M)));
     EXPECT_TRUE(matches(Code, traverse(TK_IgnoreUnlessSpelledInSource, M)));
   }
+
+  Code = R"cpp(
+struct B {
+  B(int);
+};
+
+B func1() { return 42; }
+  )cpp";
+  {
+    auto M = expr(ignoringImplicit(integerLiteral(equals(42)).bind("intLit")));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+  }
+  {
+    auto M = expr(unless(integerLiteral(equals(24)))).bind("intLit");
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 7)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+  }
+  {
+    auto M =
+        expr(anyOf(integerLiteral(equals(42)).bind("intLit"), unless(expr())));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+  }
+  {
+    auto M = expr(allOf(integerLiteral(equals(42)).bind("intLit"), expr()));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+  }
+  {
+    auto M = expr(integerLiteral(equals(42)).bind("intLit"), expr());
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+  }
+  {
+    auto M = expr(optionally(integerLiteral(equals(42)).bind("intLit")));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("intLit", 1)));
+  }
+  {
+    auto M = expr().bind("allExprs");
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_AsIs, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("allExprs", 7)));
+    EXPECT_TRUE(matchAndVerifyResultTrue(
+        Code, traverse(TK_IgnoreUnlessSpelledInSource, M),
+        std::make_unique<VerifyIdIsBoundTo<Expr>>("allExprs", 1)));
+  }
 }
 
 TEST(Traversal, traverseNoImplicit) {

From 0c01bbf4e9d517f4c6422647e6d7c362b621ea42 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 29 Jan 2021 11:20:04 -0500
Subject: [PATCH 115/244] Revert "Disable rosegment for old Android versions."

This reverts commit fae16fc0eed7cf60207901818cfe040116f2ef00.
Breaks building compiler-rt android runtimes with trunk clang
but older NDK, see discussion on https://reviews.llvm.org/D95166

(cherry picked from commit 1608ba09462d877111230e9461b895f696f8fcb1)
---
 clang/lib/Driver/ToolChains/Linux.cpp |  9 ---------
 clang/test/Driver/linux-ld.c          | 14 --------------
 2 files changed, 23 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index e17a6bd4bdd2..9663a7390ada 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -236,15 +236,6 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     ExtraOpts.push_back("relro");
   }
 
-  if (Triple.isAndroid() && Triple.isAndroidVersionLT(29)) {
-    // https://github.com/android/ndk/issues/1196
-    // The unwinder used by the crash handler on versions of Android prior to
-    // API 29 did not correctly handle binaries built with rosegment, which is
-    // enabled by default for LLD. Android only supports LLD, so it's not an
-    // issue that this flag is not accepted by other linkers.
-    ExtraOpts.push_back("--no-rosegment");
-  }
-
   // Android ARM/AArch64 use max-page-size=4096 to reduce VMA usage. Note, lld
   // from 11 onwards default max-page-size to 65536 for both ARM and AArch64.
   if ((Triple.isARM() || Triple.isAArch64()) && Triple.isAndroid()) {
diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c
index 0b788ffcb852..24d3c78643f8 100644
--- a/clang/test/Driver/linux-ld.c
+++ b/clang/test/Driver/linux-ld.c
@@ -1089,20 +1089,6 @@
 // CHECK-ANDROID-HASH-STYLE-M: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ANDROID-HASH-STYLE-M: "--hash-style=gnu"
 
-// Check that we pass --no-rosegment for pre-29 Android versions and do not for
-// 29+.
-// RUN: %clang %s -### -o %t.o 2>&1 \
-// RUN:     --target=armv7-linux-android28 \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-ROSEGMENT-28 %s
-// CHECK-ANDROID-ROSEGMENT-28: "{{.*}}ld{{(.exe)?}}"
-// CHECK-ANDROID-ROSEGMENT-28: "--no-rosegment"
-//
-// RUN: %clang %s -### -o %t.o 2>&1 \
-// RUN:     --target=armv7-linux-android29 \
-// RUN:   | FileCheck --check-prefix=CHECK-ANDROID-ROSEGMENT-29 %s
-// CHECK-ANDROID-ROSEGMENT-29: "{{.*}}ld{{(.exe)?}}"
-// CHECK-ANDROID-ROSEGMENT-29-NOT: "--no-rosegment"
-
 // RUN: %clang %s -### -o %t.o 2>&1 \
 // RUN:     --target=armv7-linux-android21 \
 // RUN:   | FileCheck --check-prefix=CHECK-ANDROID-NOEXECSTACK %s

From f290f3bfc5c900bd46992c9419c53a76f08a58cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 16 Feb 2021 15:00:54 +0200
Subject: [PATCH 116/244] doc: Add a release note for the changed comment char
 for aarch64-msvc targets

This was backported in a6ea391b832573830b011f26013ebaa946032250.
---
 llvm/docs/ReleaseNotes.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index f2eb53778406..c1bda3339a9e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -111,6 +111,10 @@ During this release ...
   ``:lo12:`` relocation specifiers, to allow the assembly output
   to actually be assembled.
 
+* Changed the assembly comment string for MSVC targets to ``//`` (consistent
+  with the MinGW and ELF targets), freeing up ``;`` to be used as
+  statement separator.
+
 Changes to the ARM Backend
 --------------------------
 

From d5d089bf08c9181134d00e74d61a12f808c3e0c3 Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
Date: Fri, 12 Feb 2021 09:47:37 +0000
Subject: [PATCH 117/244] Fix exegesis build on aarch64-windows-msvc host

Include x86 intrinsics only when compiling for x86_64
or i386.  _MSC_VER no longer implies x86.

Reviewed By: gchatelet

Differential Revision: https://reviews.llvm.org/D96498

Fixes: https://bugs.llvm.org/show_bug.cgi?id=49149

(cherry picked from commit 06f53f2f095c45c93d269b5dc010af506f4b0ff4)
---
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index e8fb025f9611..15fa54e2f6a2 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -26,7 +26,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
 #include <immintrin.h>
 #include <intrin.h>
 #endif

From d44bf3332b314b8f325e6a04c268ae77691f8454 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 13 Feb 2021 11:33:14 +0000
Subject: [PATCH 118/244] [X86] Add reduced test case for PR49162

(cherry picked from commit 5ca3ef98a71598d368f6f4aaf0b385b50b67ce4a)
---
 llvm/test/CodeGen/X86/pr49162.ll | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr49162.ll

diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll
new file mode 100644
index 000000000000..f186dc7dbe0b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49162.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i32* @PR49162(i32* %base, i160* %ptr160) {
+; X86-LABEL: PR49162:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl 8(%eax), %ecx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    shldl $16, %ecx, %eax
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: PR49162:
+; X64:       # %bb.0:
+; X64-NEXT:    leaq -4(%rdi), %rax
+; X64-NEXT:    retq
+  %load160 = load i160, i160* %ptr160, align 4
+  %shl = shl i160 %load160, 80
+  %ashr160 = ashr i160 %shl, 112
+  %trunc = trunc i160 %ashr160 to i64
+  %ashr64 = ashr i64 %trunc, 32
+  %gep = getelementptr inbounds i32, i32* %base, i64 %ashr64
+  ret i32* %gep
+}

From d9910c24fe195c69c68ed2d9ec18cf17a7d60dc7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 13 Feb 2021 11:59:52 +0000
Subject: [PATCH 119/244] [DAG] Fix shift amount limit in SimplifyDemandedBits
 trunc(shift(x,c)) to truncated bitwidth

We lost this in D56387/rG69bc0990a9181e6eb86228276d2f59435a7fae67 - where I got the src/dst bitwidths mixed up and assumed getValidShiftAmountConstant would catch it.

Patch by @craig.topper - confirmed by @Carrot that it fixes PR49162

(cherry picked from commit 7ad0c573bd4a68dc81886037457d47daa3d6aa24)
---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +-
 llvm/test/CodeGen/X86/pr49162.ll                 | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5760132e44a0..7145fc91d5f3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2012,7 +2012,7 @@ bool TargetLowering::SimplifyDemandedBits(
 
         const APInt *ShAmtC =
             TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
-        if (!ShAmtC)
+        if (!ShAmtC || ShAmtC->uge(BitWidth))
           break;
         uint64_t ShVal = ShAmtC->getZExtValue();
 
diff --git a/llvm/test/CodeGen/X86/pr49162.ll b/llvm/test/CodeGen/X86/pr49162.ll
index f186dc7dbe0b..d3c187883b12 100644
--- a/llvm/test/CodeGen/X86/pr49162.ll
+++ b/llvm/test/CodeGen/X86/pr49162.ll
@@ -17,7 +17,11 @@ define i32* @PR49162(i32* %base, i160* %ptr160) {
 ;
 ; X64-LABEL: PR49162:
 ; X64:       # %bb.0:
-; X64-NEXT:    leaq -4(%rdi), %rax
+; X64-NEXT:    movl 8(%rsi), %eax
+; X64-NEXT:    shll $16, %eax
+; X64-NEXT:    cltq
+; X64-NEXT:    sarq $16, %rax
+; X64-NEXT:    leaq (%rdi,%rax,4), %rax
 ; X64-NEXT:    retq
   %load160 = load i160, i160* %ptr160, align 4
   %shl = shl i160 %load160, 80

From 439fd4bd6a757d2e18b60e502da523b8492f51ab Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 1 Feb 2021 18:17:25 +0000
Subject: [PATCH 120/244] [X86][AVX] Add 'OK' tests cases for PR48877

(cherry picked from commit e9514429a02b1e4f8b9d54b28a934bfa9bd246ec)
---
 llvm/test/MC/Disassembler/X86/x86-64-avx.txt  | 19 +++++++++++++++++++
 llvm/test/MC/Disassembler/X86/x86-64-avx2.txt |  7 +++++++
 2 files changed, 26 insertions(+)
 create mode 100644 llvm/test/MC/Disassembler/X86/x86-64-avx.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/x86-64-avx2.txt

diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx.txt
new file mode 100644
index 000000000000..9ebb5335a22d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/x86-64-avx.txt
@@ -0,0 +1,19 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: vpackusdw (%rax), %xmm2, %xmm1
+0xc4 0xe2 0x69 0x2b 0x08
+
+# CHECK: vphsubd %xmm3, %xmm2, %xmm11
+0xc4 0x62 0x69 0x06 0xdb
+
+# CHECK: vpcmpestri $100, %xmm3, %xmm11
+0xc4 0x63 0x79 0x61 0xdb 0x64
+
+# CHECK: vpcmpestrm $100, %xmm3, %xmm11
+0xc4 0x63 0x79 0x60 0xdb 0x64
+
+# CHECK: vpcmpistri $100, %xmm3, %xmm11
+0xc4 0x63 0x79 0x63 0xdb 0x64
+
+# CHECK: vpcmpistrm $100, %xmm3, %xmm11
+0xc4 0x63 0x79 0x62 0xdb 0x64
diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt
new file mode 100644
index 000000000000..a6a87f100633
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: vpackusdw %ymm0, %ymm2, %ymm1
+0xc4 0xe2 0x6d 0x2b 0xc8
+
+# CHECK: vphsubd %ymm0, %ymm2, %ymm1
+0xc4 0xe2 0x6d 0x06 0xc8

From fa9dc0c60cbc25b9fc4c6c5ff5f4ba81c7a612b4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 2 Feb 2021 10:53:28 +0000
Subject: [PATCH 121/244] [X86][AVX] Add missing VEX_WIG tags from
 VPACKUSDW/VPHSUBD/VPCMPISTRI/VPCMPISTRM/VPCMPESTRI/VPCMPESTRM

Fixes PR48877

Differential Revision: https://reviews.llvm.org/D95801

(cherry picked from commit 4d904776a77aa80342c65cf72a962920cc9d1fa9)
---
 llvm/lib/Target/X86/X86InstrSSE.td            | 16 ++++++++--------
 llvm/test/MC/Disassembler/X86/x86-64-avx.txt  | 18 ++++++++++++++++++
 llvm/test/MC/Disassembler/X86/x86-64-avx2.txt |  6 ++++++
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 7cf555748c46..a185a2007b72 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3778,7 +3778,7 @@ let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
                              VEX_4V, VEX_WIG;
   defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
                              i128mem, SchedWriteShuffle.XMM, load, 0>,
-                             VEX_4V;
+                             VEX_4V, VEX_WIG;
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
@@ -3794,7 +3794,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
                               VEX_4V, VEX_L, VEX_WIG;
   defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
                               i256mem, SchedWriteShuffle.YMM, load, 0>,
-                              VEX_4V, VEX_L;
+                              VEX_4V, VEX_L, VEX_WIG;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -4756,7 +4756,7 @@ let isCommutable = 0 in {
                                   SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
                                   load, i128mem,
-                                  SchedWritePHAdd.XMM, 0>, VEX_4V;
+                                  SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
   defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
                                       int_x86_ssse3_psign_b_128,
                                       SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
@@ -4802,7 +4802,7 @@ let isCommutable = 0 in {
                                   SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
                                   load, i256mem,
-                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
+                                  SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
   defm VPSIGNB   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
                                        SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
   defm VPSIGNW   : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@@ -6503,7 +6503,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
 
 let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX;
+  defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
   defm PCMPISTRM  : pcmpistrm_SS42AI<"pcmpistrm"> ;
 }
 
@@ -6521,7 +6521,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
 
 let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX;
+  defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
   defm PCMPESTRM :  SS42AI_pcmpestrm<"pcmpestrm">;
 }
 
@@ -6539,7 +6539,7 @@ multiclass SS42AI_pcmpistri<string asm> {
 
 let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+  defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
   defm PCMPISTRI  : SS42AI_pcmpistri<"pcmpistri">;
 }
 
@@ -6557,7 +6557,7 @@ multiclass SS42AI_pcmpestri<string asm> {
 
 let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
   let Predicates = [HasAVX] in
-  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+  defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
   defm PCMPESTRI  : SS42AI_pcmpestri<"pcmpestri">;
 }
 
diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx.txt
index 9ebb5335a22d..b7ab0b1a7dc6 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64-avx.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64-avx.txt
@@ -3,17 +3,35 @@
 # CHECK: vpackusdw (%rax), %xmm2, %xmm1
 0xc4 0xe2 0x69 0x2b 0x08
 
+# CHECK: vpackusdw (%rax), %xmm2, %xmm1
+0xc4 0xe2 0xe9 0x2b 0x08
+
 # CHECK: vphsubd %xmm3, %xmm2, %xmm11
 0xc4 0x62 0x69 0x06 0xdb
 
+# CHECK: vphsubd %xmm3, %xmm2, %xmm11
+0xc4 0x62 0xe9 0x06 0xdb
+
 # CHECK: vpcmpestri $100, %xmm3, %xmm11
 0xc4 0x63 0x79 0x61 0xdb 0x64
 
+# CHECK: vpcmpestri $100, %xmm3, %xmm11
+0xc4 0x63 0xf9 0x61 0xdb 0x64
+
 # CHECK: vpcmpestrm $100, %xmm3, %xmm11
 0xc4 0x63 0x79 0x60 0xdb 0x64
 
+# CHECK: vpcmpestrm $100, %xmm3, %xmm11
+0xc4 0x63 0xf9 0x60 0xdb 0x64
+
 # CHECK: vpcmpistri $100, %xmm3, %xmm11
 0xc4 0x63 0x79 0x63 0xdb 0x64
 
+# CHECK: vpcmpistri $100, %xmm3, %xmm11
+0xc4 0x63 0xf9 0x63 0xdb 0x64
+
 # CHECK: vpcmpistrm $100, %xmm3, %xmm11
 0xc4 0x63 0x79 0x62 0xdb 0x64
+
+# CHECK: vpcmpistrm $100, %xmm3, %xmm11
+0xc4 0x63 0xf9 0x62 0xdb 0x64
diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt
index a6a87f100633..d876067654fe 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64-avx2.txt
@@ -3,5 +3,11 @@
 # CHECK: vpackusdw %ymm0, %ymm2, %ymm1
 0xc4 0xe2 0x6d 0x2b 0xc8
 
+# CHECK: vpackusdw %ymm0, %ymm2, %ymm1
+0xc4 0xe2 0xed 0x2b 0xc8
+
 # CHECK: vphsubd %ymm0, %ymm2, %ymm1
 0xc4 0xe2 0x6d 0x06 0xc8
+
+# CHECK: vphsubd %ymm0, %ymm2, %ymm1
+0xc4 0xe2 0xed 0x06 0xc8

From f23ee06ec27e991f9f1fbb646f3a2288aeedee71 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 2 Feb 2021 15:20:18 +0100
Subject: [PATCH 122/244] [clangd] Fix race in Global CDB shutdown

I believe the atomic write can be reordered after the notify, and that
seems to be happening on mac m1: http://45.33.8.238/macm1/2654/step_8.txt
In practice maybe seq_cst is enough? But no reason not to lock here.

https://bugs.llvm.org/show_bug.cgi?id=48998
(cherry picked from commit 6ac3fd9706047304c52a678884122a3a6bc55432)
---
 clang-tools-extra/clangd/GlobalCompilationDatabase.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
index 1a5379acfe7d..542d0c3e4dbc 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
@@ -34,6 +34,7 @@
 #include <atomic>
 #include <chrono>
 #include <condition_variable>
+#include <mutex>
 #include <string>
 #include <tuple>
 #include <vector>
@@ -567,7 +568,10 @@ class DirectoryBasedGlobalCompilationDatabase::BroadcastThread {
   }
 
   ~BroadcastThread() {
-    ShouldStop.store(true, std::memory_order_release);
+    {
+      std::lock_guard<std::mutex> Lock(Mu);
+      ShouldStop.store(true, std::memory_order_release);
+    }
     CV.notify_all();
     Thread.join();
   }

From 440b16a4fc04a4bb614a60c26f42ab2ec27049a4 Mon Sep 17 00:00:00 2001
From: Jez Ng <jezng@fb.com>
Date: Tue, 2 Feb 2021 18:18:07 -0500
Subject: [PATCH 123/244] [lld-macho] Fill out release notes for 12.x

Differential Revision: https://reviews.llvm.org/D95900
---
 lld/docs/ReleaseNotes.rst | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index ea1403888eba..7c1cbc4a4c4b 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -58,10 +58,26 @@ MinGW Improvements
   (`D93950 <https://reviews.llvm.org/D93950>`_)
 
 
-MachO Improvements
+Mach-O Improvements
 ------------------
 
-* Item 1.
+We've gotten the new implementation of LLD for Mach-O to the point where it is
+able to link large x86_64 programs, and we'd love to get some alpha testing on
+it. The new Darwin back-end can be invoked as follows:
+
+.. code-block::
+   clang -fuse-ld=lld.darwinnew /path/to/file.c
+
+To reach this point, we implemented numerous features, and it's easier to list
+the major features we *haven't* yet completed:
+
+* LTO support
+* Stack unwinding for exceptions
+* Support for arm64, arm, and i386 architectures
+
+If you stumble upon an issue and it doesn't fall into one of these categories,
+please file a bug report!
+
 
 WebAssembly Improvements
 ------------------------

From 6584a9a4c55e10c055f9f450798b826a9624d82f Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Mon, 8 Feb 2021 16:33:46 +0000
Subject: [PATCH 124/244] [release][docs] Update contributions to LLVM 12 for
 scalable vectors.

Differential Revision: https://reviews.llvm.org/D96270
---
 clang/docs/ReleaseNotes.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f4ca8a855142..a43cc33988ab 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -144,6 +144,18 @@ New Pragmas in Clang
 
 - ...
 
+Modified Pragmas in Clang
+-------------------------
+
+- The "#pragma clang loop vectorize_width" has been extended to support an
+  optional 'fixed|scalable' argument, which can be used to indicate that the
+  compiler should use fixed-width or scalable vectorization.  Fixed-width is
+  assumed by default.
+
+  Scalable or vector length agnostic vectorization is an experimental feature
+  for targets that support scalable vectors. For more information please refer
+  to the Clang Language Extensions documentation.
+
 Attribute Changes in Clang
 --------------------------
 

From dda7ef025bc66ea326f5a8bda8c5b8534d21c2dd Mon Sep 17 00:00:00 2001
From: Lei Huang <lei@ca.ibm.com>
Date: Fri, 19 Feb 2021 19:24:05 +0000
Subject: [PATCH 125/244] [PowerPC] Update release notes for changes to PowerPC
 for V12.0

---
 llvm/docs/ReleaseNotes.rst | 70 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index c1bda3339a9e..542a505bfd2e 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -129,7 +129,75 @@ During this release ...
 Changes to the PowerPC Target
 -----------------------------
 
-During this release ...
+Optimization:
+
+* Made improvements to loop unroll-and-jam including fix to respect user
+  provided #pragma unroll-and-jam for loops on targets other than ARM.
+* Improved PartialInliner allowing it to handle code regions in a switch
+  statements.
+* Improved PGO support on AIX by building and linking with compiler-rt profile
+  library.
+* Add support for Epilogue Vectorization and enabled it by default.
+
+CodeGen:
+
+* POWER10 support
+  * Implementation of PC Relative addressing in LLD including the associated
+    linker optimizations.
+  * Add support for the new matrix multiplication (MMA) instructions to Clang
+    and LLVM.
+  * Implementation of Power10 builtins.
+
+* Scheduling enhancements
+  * Add a new algorithm to cluster more loads/stores if the DAG is not too
+    complicated.
+  * Enable the PowerPC scheduling heuristic for Power10.
+
+* Target dependent passes tuning
+  * Enhance LoopStrengthReduce/PPCLoopInstrFormPrep pass for PowerPC,
+    especially for P10 intrinsics.
+  * Enhance machine combiner pass to reduce register pressure for PowerPC.
+  * Improve MachineSink to do more sinking based on register pressure and alias
+    analysis.
+
+* General improvements
+  * Complete the constrained floating point operations support.
+  * Improve the llvm-exegesis support.
+  * Improve the stack clash protection to probe the gap between stackptr and
+    realigned stackptr.
+  * Improve the IEEE long double support for Power8.
+  * Enable MemorySSA for LoopSink.
+  * Enhance LLVM debugging functionality via options such as -print-changed and
+    -print-before-changed.
+  * Add builtins for Power9 (i.e. darn, xvtdiv, xvtsqrt etc).
+  * Add options to disable all or part of LoopIdiomRecognizePass.
+  * Add support for printing the DDG in DOT form allowing for visual inspection
+    of the Data Dependence Graph.
+  * Remove the QPX support.
+  * Significant number of bug fixes including all the fixes necessary to
+    achieve a clean test run for Julia.
+
+AIX Support:
+
+* Compiler-rt support
+  * Add support for building compiler-rt for AIX and 32-bit Power targets.
+  * Made compiler-rt the default rtlib for AIX.
+
+* General Improvements
+  * Enable the AIX extended AltiVec ABI under option -mabi=vec-extabi.
+  * Add partial C99 complex type support.
+  * Implemente traceback table for functions (encodes vector information,
+    emits exception handling).
+  * Implemente code generation for C++ dynamic initialization and finalization.
+    of non-local variables for use with the -bcdtors option of the AIX linker.
+  * Add new option -mignore-xcoff-visibility.
+  * Enable explicit sections on AIX.
+  * Enable -f[no-]data-sections on AIX and set -fdata-sections to be the default
+    on AIX.
+  * Enable -f[no-]function-sections.
+  * Add support for relocation generation using the large code model.
+  * Add pragma align natural and sorted out pragma pack stack effect.
+
 
 Changes to the X86 Target
 -------------------------

From c2a0b0810a40199ec94c90539b601ba72bcb3523 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 18 Feb 2021 21:25:14 +0100
Subject: [PATCH 126/244] [DCE] Add tests for non-willreturn function being
 removed (NFC)

(cherry picked from commit 4045ad6b0ccd35fe990d51b9bfdd9e7de109bdf5)
---
 llvm/test/Transforms/ADCE/willreturn.ll | 17 +++++++++++++++++
 llvm/test/Transforms/BDCE/willreturn.ll | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 llvm/test/Transforms/ADCE/willreturn.ll
 create mode 100644 llvm/test/Transforms/BDCE/willreturn.ll

diff --git a/llvm/test/Transforms/ADCE/willreturn.ll b/llvm/test/Transforms/ADCE/willreturn.ll
new file mode 100644
index 000000000000..c3482a417cb0
--- /dev/null
+++ b/llvm/test/Transforms/ADCE/willreturn.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -adce -S < %s | FileCheck %s
+
+declare void @may_not_return(i32) nounwind readnone
+declare void @will_return(i32) nounwind readnone willreturn
+
+; FIXME: This is a miscompile.
+define void @test(i32 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    ret void
+;
+  %b = add i32 %a, 1
+  call void @may_not_return(i32 %b)
+  %c = add i32 %b, 1
+  call void @will_return(i32 %c)
+  ret void
+}
diff --git a/llvm/test/Transforms/BDCE/willreturn.ll b/llvm/test/Transforms/BDCE/willreturn.ll
new file mode 100644
index 000000000000..b87ab0050e7a
--- /dev/null
+++ b/llvm/test/Transforms/BDCE/willreturn.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -bdce -S < %s | FileCheck %s
+
+declare void @may_not_return(i32) nounwind readnone
+declare void @will_return(i32) nounwind readnone willreturn
+
+; FIXME: This is a miscompile.
+define void @test(i32 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    ret void
+;
+  %b = add i32 %a, 1
+  call void @may_not_return(i32 %b)
+  %c = add i32 %b, 1
+  call void @will_return(i32 %c)
+  ret void
+}

From d1d7dc779a296001568d855bba7843a9eb94a585 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 18 Feb 2021 22:15:17 +0100
Subject: [PATCH 127/244] [IR] Move willReturn() to Instruction

This moves the willReturn() helper from CallBase to Instruction,
so that it can be used in a more generic manner. This will make
it easier to fix additional passes (ADCE and BDCE), and will give
us one place to change if additional instructions should become
non-willreturn (e.g. there has been talk about handling volatile
operations this way).

I have also included the IntrinsicInst workaround directly in
here, so that it gets applied consistently. (As such this change
is not entirely NFC -- FuncAttrs will now use this as well.)

Differential Revision: https://reviews.llvm.org/D96992

(cherry picked from commit 370addb996138a9e3634899cf264c7621307617a)
---
 llvm/include/llvm/IR/InstrTypes.h         |  3 ---
 llvm/include/llvm/IR/Instruction.h        |  4 ++++
 llvm/lib/Analysis/ValueTracking.cpp       | 28 +++--------------------
 llvm/lib/IR/Instruction.cpp               | 10 ++++++++
 llvm/lib/Transforms/IPO/FunctionAttrs.cpp |  3 +--
 llvm/lib/Transforms/Utils/Local.cpp       |  9 ++------
 6 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index f42ef48de6b3..955ac8e537fe 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1757,9 +1757,6 @@ class CallBase : public Instruction {
     return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
   }
 
-  /// Returns true if this function is guaranteed to return.
-  bool willReturn() const { return hasFnAttr(Attribute::WillReturn); }
-
   void setOnlyReadsMemory() {
     addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
   }
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index d2a55f89fac9..85afaed5225e 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -633,6 +633,10 @@ class Instruction : public User,
   /// generated program.
   bool isSafeToRemove() const;
 
+  /// Return true if the instruction will return (unwinding is considered as
+  /// a form of returning control flow here).
+  bool willReturn() const;
+
   /// Return true if the instruction is a variety of EH-block.
   bool isEHPad() const {
     switch (getOpcode()) {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5600a3b33750..e174c5efe424 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5018,36 +5018,14 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
   // arbitrary length of time, but programs aren't allowed to rely on that.
 
   // If there is no successor, then execution can't transfer to it.
-  if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
-    return !CRI->unwindsToCaller();
-  if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
-    return !CatchSwitch->unwindsToCaller();
-  if (isa<ResumeInst>(I))
-    return false;
   if (isa<ReturnInst>(I))
     return false;
   if (isa<UnreachableInst>(I))
     return false;
 
-  // Calls can throw, or contain an infinite loop, or kill the process.
-  if (const auto *CB = dyn_cast<CallBase>(I)) {
-    // Call sites that throw have implicit non-local control flow.
-    if (!CB->doesNotThrow())
-      return false;
-
-    // A function which doens't throw and has "willreturn" attribute will
-    // always return.
-    if (CB->hasFnAttr(Attribute::WillReturn))
-      return true;
-
-    // FIXME: Temporarily assume that all side-effect free intrinsics will
-    // return. Remove this workaround once all intrinsics are appropriately
-    // annotated.
-    return isa<IntrinsicInst>(CB) && CB->onlyReadsMemory();
-  }
-
-  // Other instructions return normally.
-  return true;
+  // An instruction that returns without throwing must transfer control flow
+  // to a successor.
+  return !I->mayThrow() && I->willReturn();
 }
 
 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 1e3fcd672a43..246180e72172 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -633,6 +633,16 @@ bool Instruction::isSafeToRemove() const {
          !this->isTerminator();
 }
 
+bool Instruction::willReturn() const {
+  if (const auto *CB = dyn_cast<CallBase>(this))
+    // FIXME: Temporarily assume that all side-effect free intrinsics will
+    // return. Remove this workaround once all intrinsics are appropriately
+    // annotated.
+    return CB->hasFnAttr(Attribute::WillReturn) ||
+           (isa<IntrinsicInst>(CB) && CB->onlyReadsMemory());
+  return true;
+}
+
 bool Instruction::isLifetimeStartOrEnd() const {
   auto II = dyn_cast<IntrinsicInst>(this);
   if (!II)
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 30a1f81ad0e1..256acd7e1d17 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1445,8 +1445,7 @@ static bool functionWillReturn(const Function &F) {
   // If there are no loops, then the function is willreturn if all calls in
   // it are willreturn.
   return all_of(instructions(F), [](const Instruction &I) {
-    const auto *CB = dyn_cast<CallBase>(&I);
-    return !CB || CB->hasFnAttr(Attribute::WillReturn);
+    return I.willReturn();
   });
 }
 
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 477ea458c763..d055f3dd3084 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -420,13 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
     return true;
   }
 
-  if (auto *CB = dyn_cast<CallBase>(I)) {
-    // Treat calls that may not return as alive.
-    // TODO: Remove the intrinsic escape hatch once all intrinsics set
-    // willreturn properly.
-    if (!CB->willReturn() && !isa<IntrinsicInst>(I))
-      return false;
-  }
+  if (!I->willReturn())
+    return false;
 
   if (!I->mayHaveSideEffects())
     return true;

From 8e9c2ad95eb5ab439b933d8c793957bc4d82e456 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 18 Feb 2021 22:29:19 +0100
Subject: [PATCH 128/244] [DCE] Don't remove non-willreturn calls

In both ADCE and BDCE (via DemandedBits) we should not remove
instructions that are not guaranteed to return. This issue was
pointed out by fhahn in the recent llvm-dev thread.

Differential Revision: https://reviews.llvm.org/D96993

(cherry picked from commit 2f17ed294fcd8cde505b93c9c5bbab06ba59051c)
---
 llvm/lib/Analysis/DemandedBits.cpp         | 2 +-
 llvm/lib/Transforms/Scalar/ADCE.cpp        | 2 +-
 llvm/test/Feature/OperandBundles/adce.ll   | 4 ++--
 llvm/test/LTO/X86/parallel.ll              | 4 ++--
 llvm/test/Transforms/ADCE/dce_pure_call.ll | 2 +-
 llvm/test/Transforms/ADCE/willreturn.ll    | 3 ++-
 llvm/test/Transforms/BDCE/dce-pure.ll      | 2 +-
 llvm/test/Transforms/BDCE/dead-void-ro.ll  | 2 +-
 llvm/test/Transforms/BDCE/willreturn.ll    | 3 ++-
 llvm/test/tools/gold/X86/parallel.ll       | 4 ++--
 10 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 461fd7239905..dd11b0b02bf8 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -80,7 +80,7 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
 
 static bool isAlwaysLive(Instruction *I) {
   return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
-         I->mayHaveSideEffects();
+         I->mayHaveSideEffects() || !I->willReturn();
 }
 
 void DemandedBits::determineLiveOperandBits(
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 2b649732a799..ce4e5e575fbf 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -325,7 +325,7 @@ void AggressiveDeadCodeElimination::initialize() {
 
 bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
   // TODO -- use llvm::isInstructionTriviallyDead
-  if (I.isEHPad() || I.mayHaveSideEffects()) {
+  if (I.isEHPad() || I.mayHaveSideEffects() || !I.willReturn()) {
     // Skip any value profile instrumentation calls if they are
     // instrumenting constants.
     if (isInstrumentsConstant(I))
diff --git a/llvm/test/Feature/OperandBundles/adce.ll b/llvm/test/Feature/OperandBundles/adce.ll
index a729ba710689..fa4e045fdd1e 100644
--- a/llvm/test/Feature/OperandBundles/adce.ll
+++ b/llvm/test/Feature/OperandBundles/adce.ll
@@ -5,8 +5,8 @@
 ; bundles since the presence of unknown operand bundles implies
 ; arbitrary memory effects.
 
-declare void @readonly_function() readonly nounwind
-declare void @readnone_function() readnone nounwind
+declare void @readonly_function() readonly nounwind willreturn
+declare void @readnone_function() readnone nounwind willreturn
 
 define void @test0() {
 ; CHECK-LABEL: @test0(
diff --git a/llvm/test/LTO/X86/parallel.ll b/llvm/test/LTO/X86/parallel.ll
index b3c128193821..34235ec0202b 100644
--- a/llvm/test/LTO/X86/parallel.ll
+++ b/llvm/test/LTO/X86/parallel.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK0-NOT: bar
 ; CHECK0: T foo
 ; CHECK0-NOT: bar
-define void @foo() {
+define void @foo() mustprogress {
   call void @bar()
   ret void
 }
@@ -19,7 +19,7 @@ define void @foo() {
 ; CHECK1-NOT: foo
 ; CHECK1: T bar
 ; CHECK1-NOT: foo
-define void @bar() {
+define void @bar() mustprogress {
   call void @foo()
   ret void
 }
diff --git a/llvm/test/Transforms/ADCE/dce_pure_call.ll b/llvm/test/Transforms/ADCE/dce_pure_call.ll
index 66483abbc919..88e92bf13f49 100644
--- a/llvm/test/Transforms/ADCE/dce_pure_call.ll
+++ b/llvm/test/Transforms/ADCE/dce_pure_call.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -adce -S < %s | not grep call
 
-declare i32 @strlen(i8*) readonly nounwind
+declare i32 @strlen(i8*) readonly nounwind willreturn
 
 define void @test() {
 	call i32 @strlen( i8* null )		; <i32>:1 [#uses=0]
diff --git a/llvm/test/Transforms/ADCE/willreturn.ll b/llvm/test/Transforms/ADCE/willreturn.ll
index c3482a417cb0..61bbbe0ae5fa 100644
--- a/llvm/test/Transforms/ADCE/willreturn.ll
+++ b/llvm/test/Transforms/ADCE/willreturn.ll
@@ -4,9 +4,10 @@
 declare void @may_not_return(i32) nounwind readnone
 declare void @will_return(i32) nounwind readnone willreturn
 
-; FIXME: This is a miscompile.
 define void @test(i32 %a) {
 ; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[A:%.*]], 1
+; CHECK-NEXT:    call void @may_not_return(i32 [[B]])
 ; CHECK-NEXT:    ret void
 ;
   %b = add i32 %a, 1
diff --git a/llvm/test/Transforms/BDCE/dce-pure.ll b/llvm/test/Transforms/BDCE/dce-pure.ll
index a487a04db611..e00121d0c9e9 100644
--- a/llvm/test/Transforms/BDCE/dce-pure.ll
+++ b/llvm/test/Transforms/BDCE/dce-pure.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -bdce -S < %s | FileCheck %s
 ; RUN: opt -passes=bdce -S < %s | FileCheck %s
 
-declare i32 @strlen(i8*) readonly nounwind
+declare i32 @strlen(i8*) readonly nounwind willreturn
 
 define void @test1() {
   call i32 @strlen( i8* null )
diff --git a/llvm/test/Transforms/BDCE/dead-void-ro.ll b/llvm/test/Transforms/BDCE/dead-void-ro.ll
index 36f09511503b..77f4e097f4bb 100644
--- a/llvm/test/Transforms/BDCE/dead-void-ro.ll
+++ b/llvm/test/Transforms/BDCE/dead-void-ro.ll
@@ -14,5 +14,5 @@ define void @PR34211(i16* %p) {
 
 declare void @no_side_effects_so_dead(i16) #0
 
-attributes #0 = { nounwind readnone }
+attributes #0 = { nounwind readnone willreturn }
 
diff --git a/llvm/test/Transforms/BDCE/willreturn.ll b/llvm/test/Transforms/BDCE/willreturn.ll
index b87ab0050e7a..5efd6ad6e0cf 100644
--- a/llvm/test/Transforms/BDCE/willreturn.ll
+++ b/llvm/test/Transforms/BDCE/willreturn.ll
@@ -4,9 +4,10 @@
 declare void @may_not_return(i32) nounwind readnone
 declare void @will_return(i32) nounwind readnone willreturn
 
-; FIXME: This is a miscompile.
 define void @test(i32 %a) {
 ; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[B:%.*]] = add i32 [[A:%.*]], 1
+; CHECK-NEXT:    call void @may_not_return(i32 [[B]])
 ; CHECK-NEXT:    ret void
 ;
   %b = add i32 %a, 1
diff --git a/llvm/test/tools/gold/X86/parallel.ll b/llvm/test/tools/gold/X86/parallel.ll
index 6972efc652a3..b8072f01e5a2 100644
--- a/llvm/test/tools/gold/X86/parallel.ll
+++ b/llvm/test/tools/gold/X86/parallel.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK0-NOT: bar
 ; CHECK0: T foo
 ; CHECK0-NOT: bar
-define void @foo() {
+define void @foo() mustprogress {
   call void @bar()
   ret void
 }
@@ -24,7 +24,7 @@ define void @foo() {
 ; CHECK1-NOT: foo
 ; CHECK1: T bar
 ; CHECK1-NOT: foo
-define void @bar() {
+define void @bar() mustprogress {
   call void @foo()
   ret void
 }

From 17daef8bfdfd3a78465122f968a93df6db42dca6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 19 Feb 2021 13:06:45 +0100
Subject: [PATCH 129/244] [LLD] Fix tests after D96993

We now need mustprogress to eliminate these calls. The code doesn't
really make sense, but that's not the point of the test...

(cherry picked from commit ac065b7a37d6dd8daacd526f6c3a0d1563bc88ac)
---
 lld/test/ELF/lto/parallel.ll  | 4 ++--
 lld/test/wasm/lto/parallel.ll | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lld/test/ELF/lto/parallel.ll b/lld/test/ELF/lto/parallel.ll
index d9cb4fed7bfa..d89431e8b4a1 100644
--- a/lld/test/ELF/lto/parallel.ll
+++ b/lld/test/ELF/lto/parallel.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK0-NOT: bar
 ; CHECK0: T foo
 ; CHECK0-NOT: bar
-define void @foo() {
+define void @foo() mustprogress {
   call void @bar()
   ret void
 }
@@ -22,7 +22,7 @@ define void @foo() {
 ; CHECK1-NOT: foo
 ; CHECK1: T bar
 ; CHECK1-NOT: foo
-define void @bar() {
+define void @bar() mustprogress {
   call void @foo()
   ret void
 }
diff --git a/lld/test/wasm/lto/parallel.ll b/lld/test/wasm/lto/parallel.ll
index a93c3558d969..261cf2ef7dae 100644
--- a/lld/test/wasm/lto/parallel.ll
+++ b/lld/test/wasm/lto/parallel.ll
@@ -10,7 +10,7 @@ target triple = "wasm32-unknown-unknown-wasm"
 ; CHECK0-NOT: bar
 ; CHECK0: T foo
 ; CHECK0-NOT: bar
-define void @foo() {
+define void @foo() mustprogress {
   call void @bar()
   ret void
 }
@@ -18,7 +18,7 @@ define void @foo() {
 ; CHECK1-NOT: foo
 ; CHECK1: T bar
 ; CHECK1-NOT: foo
-define void @bar() {
+define void @bar() mustprogress {
   call void @foo()
   ret void
 }

From a338d577bb4fbf9013cf0c22c211d25bf3c41a26 Mon Sep 17 00:00:00 2001
From: Jeroen Dobbelaere <jeroen.dobbelaere@synopsys.com>
Date: Thu, 18 Feb 2021 17:29:46 +0100
Subject: [PATCH 130/244] [clang] functions with the 'const' or 'pure'
 attribute must always return.

As described in
* https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute
* https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-const-function-attribute

An `__attribute__((pure))` function must always return, as well as an `__attribute__((const))` function.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D96960

(cherry picked from commit 46757ccb49ab88da54ca8ddd43665d5255ee80f7)
---
 clang/lib/CodeGen/CGCall.cpp                           | 5 +++++
 clang/test/CodeGen/complex-builtins.c                  | 3 ++-
 clang/test/CodeGen/complex-libcalls.c                  | 3 ++-
 clang/test/CodeGen/function-attributes.c               | 2 +-
 clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp | 8 ++++----
 clang/test/Sema/libbuiltins-ctype-powerpc64.c          | 2 +-
 clang/test/Sema/libbuiltins-ctype-x86_64.c             | 2 +-
 7 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 42801372189b..bc7582c67989 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1995,9 +1995,14 @@ void CodeGenModule::ConstructAttributeList(
     if (TargetDecl->hasAttr<ConstAttr>()) {
       FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+      // gcc specifies that 'const' functions have greater restrictions than
+      // 'pure' functions, so they also cannot have infinite loops.
+      FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
     } else if (TargetDecl->hasAttr<PureAttr>()) {
       FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+      // gcc specifies that 'pure' functions cannot have infinite loops.
+      FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
     } else if (TargetDecl->hasAttr<NoAliasAttr>()) {
       FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly);
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
diff --git a/clang/test/CodeGen/complex-builtins.c b/clang/test/CodeGen/complex-builtins.c
index 96c0e7117016..6fea8a9f028c 100644
--- a/clang/test/CodeGen/complex-builtins.c
+++ b/clang/test/CodeGen/complex-builtins.c
@@ -133,7 +133,7 @@ void foo(float f) {
 // NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cproj(double, double) [[READNONE:#[0-9]+]]
 // HAS_ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[WILLRETURN_NOT_READNONE:#[0-9]+]]
 
   __builtin_cpow(f,f);       __builtin_cpowf(f,f);      __builtin_cpowl(f,f);
 
@@ -202,3 +202,4 @@ void foo(float f) {
 
 // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
 // HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+// HAS_ERRNO: attributes [[WILLRETURN_NOT_READNONE]] = { nounwind willreturn {{.*}} }
diff --git a/clang/test/CodeGen/complex-libcalls.c b/clang/test/CodeGen/complex-libcalls.c
index 9bd419a83821..44d6849c0a71 100644
--- a/clang/test/CodeGen/complex-libcalls.c
+++ b/clang/test/CodeGen/complex-libcalls.c
@@ -133,7 +133,7 @@ void foo(float f) {
 // NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cproj(double, double) [[READNONE:#[0-9]+]]
 // HAS_ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[WILLRETURN_NOT_READNONE:#[0-9]+]]
 
   cpow(f,f);       cpowf(f,f);      cpowl(f,f);
 
@@ -202,3 +202,4 @@ void foo(float f) {
 
 // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} }
 // HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} }
+// HAS_ERRNO: attributes [[WILLRETURN_NOT_READNONE]] = { nounwind willreturn {{.*}} }
diff --git a/clang/test/CodeGen/function-attributes.c b/clang/test/CodeGen/function-attributes.c
index ffb86a6cd272..f14f24801006 100644
--- a/clang/test/CodeGen/function-attributes.c
+++ b/clang/test/CodeGen/function-attributes.c
@@ -115,5 +115,5 @@ void f20(void) {
 // CHECK: attributes [[SR]] = { nounwind optsize{{.*}} "stackrealign"{{.*}} }
 // CHECK: attributes [[RT]] = { nounwind optsize returns_twice{{.*}} }
 // CHECK: attributes [[NR]] = { noreturn optsize }
-// CHECK: attributes [[NUW_RN]] = { nounwind optsize readnone }
+// CHECK: attributes [[NUW_RN]] = { nounwind optsize readnone willreturn }
 // CHECK: attributes [[RT_CALL]] = { optsize returns_twice }
diff --git a/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp b/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp
index 25400a552e5d..e1d539608fcc 100644
--- a/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp
+++ b/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp
@@ -15,8 +15,8 @@ int f(void) {
 // CHECK: declare i32 @_Z1tv() [[TF2:#[0-9]+]]
 
 // CHECK: attributes [[TF]] = { {{.*}} }
-// CHECK: attributes [[NUW_RN]] = { nounwind readnone{{.*}} }
-// CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} }
+// CHECK: attributes [[NUW_RN]] = { nounwind readnone willreturn{{.*}} }
+// CHECK: attributes [[NUW_RO]] = { nounwind readonly willreturn{{.*}} }
 // CHECK: attributes [[TF2]] = { {{.*}} }
-// CHECK: attributes [[NUW_RN_CALL]] = { nounwind readnone }
-// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly }
+// CHECK: attributes [[NUW_RN_CALL]] = { nounwind readnone willreturn }
+// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn }
diff --git a/clang/test/Sema/libbuiltins-ctype-powerpc64.c b/clang/test/Sema/libbuiltins-ctype-powerpc64.c
index bfd79acb0ab0..ba0efb205944 100644
--- a/clang/test/Sema/libbuiltins-ctype-powerpc64.c
+++ b/clang/test/Sema/libbuiltins-ctype-powerpc64.c
@@ -62,4 +62,4 @@ void test(int x) {
 // CHECK: declare signext i32 @toupper(i32 signext) [[NUW_RO:#[0-9]+]]
 
 // CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} }
-// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly }
+// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn }
diff --git a/clang/test/Sema/libbuiltins-ctype-x86_64.c b/clang/test/Sema/libbuiltins-ctype-x86_64.c
index 4934e6f16752..b8a2c7e81584 100644
--- a/clang/test/Sema/libbuiltins-ctype-x86_64.c
+++ b/clang/test/Sema/libbuiltins-ctype-x86_64.c
@@ -62,4 +62,4 @@ void test(int x) {
 // CHECK: declare i32 @toupper(i32) [[NUW_RO:#[0-9]+]]
 
 // CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} }
-// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly }
+// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn }

From 2f74c22048277d255078d376b55dd40dddbaa376 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 18 Feb 2021 21:04:32 -0500
Subject: [PATCH 131/244] [OpenMP][NVPTX] Add the support for CUDA 11.2 and
 CUDA 11.1

CUDA 11.2 and CUDA 11.1 are all available now.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D97004

(cherry picked from commit 89827fd404f954605663776e746ec351bde61348)
---
 openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index b705e0bb6a9f..5478cd3f6aea 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -152,8 +152,8 @@ add_custom_target(omptarget-nvptx-bc)
 
 # This map is from clang/lib/Driver/ToolChains/Cuda.cpp.
 # The last element is the default case.
-set(cuda_version_list 110 102 101 100 92 91 90 80)
-set(ptx_feature_list 70 65 64 63 61 61 60 42)
+set(cuda_version_list 112 111 110 102 101 100 92 91 90 80)
+set(ptx_feature_list 71 71 70 65 64 63 61 61 60 42)
 # The following two lines of ugly code is not needed when the minimal CMake
 # version requirement is 3.17+.
 list(LENGTH cuda_version_list num_version_supported)

From 34e8fd50391923ec4d81ec988376588885107071 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Fri, 22 Jan 2021 15:20:52 +0100
Subject: [PATCH 132/244] [clangd] Treat "null" optional fields as missing

Clangd currently throws away any protocol messages whenever an optional
field has an unexpected type. This patch changes the behaviour to treat
`null` fields as missing.

This enables clangd to be more tolerant against small violations to the
LSP spec.

Fixes https://github.com/clangd/vscode-clangd/issues/134

Differential Revision: https://reviews.llvm.org/D95229

(cherry picked from commit af20232b8e189335da571f48c2467b244b7fd772)
---
 clang-tools-extra/clangd/Protocol.cpp | 46 ++++++++++++++++++---------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 78110dc0de60..76cf813e6808 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -27,6 +27,21 @@
 
 namespace clang {
 namespace clangd {
+namespace {
+
+// Helper that doesn't treat `null` and absent fields as failures.
+template <typename T>
+bool mapOptOrNull(const llvm::json::Value &Params, llvm::StringLiteral Prop,
+                  T &Out, llvm::json::Path P) {
+  auto *O = Params.getAsObject();
+  assert(O);
+  auto *V = O->get(Prop);
+  // Field is missing or null.
+  if (!V || V->getAsNull().hasValue())
+    return true;
+  return fromJSON(*V, Out, P.field(Prop));
+}
+} // namespace
 
 char LSPError::ID;
 
@@ -490,7 +505,7 @@ bool fromJSON(const llvm::json::Value &Params, DidChangeTextDocumentParams &R,
   return O && O.map("textDocument", R.textDocument) &&
          O.map("contentChanges", R.contentChanges) &&
          O.map("wantDiagnostics", R.wantDiagnostics) &&
-         O.mapOptional("forceRebuild", R.forceRebuild);
+         mapOptOrNull(Params, "forceRebuild", R.forceRebuild, P);
 }
 
 bool fromJSON(const llvm::json::Value &E, FileChangeType &Out,
@@ -580,10 +595,10 @@ bool fromJSON(const llvm::json::Value &Params, Diagnostic &R,
               llvm::json::Path P) {
   llvm::json::ObjectMapper O(Params, P);
   return O && O.map("range", R.range) && O.map("message", R.message) &&
-         O.mapOptional("severity", R.severity) &&
-         O.mapOptional("category", R.category) &&
-         O.mapOptional("code", R.code) && O.mapOptional("source", R.source);
-  return true;
+         mapOptOrNull(Params, "severity", R.severity, P) &&
+         mapOptOrNull(Params, "category", R.category, P) &&
+         mapOptOrNull(Params, "code", R.code, P) &&
+         mapOptOrNull(Params, "source", R.source, P);
 }
 
 llvm::json::Value toJSON(const PublishDiagnosticsParams &PDP) {
@@ -818,7 +833,7 @@ bool fromJSON(const llvm::json::Value &Params, CompletionContext &R,
   llvm::json::ObjectMapper O(Params, P);
   int TriggerKind;
   if (!O || !O.map("triggerKind", TriggerKind) ||
-      !O.mapOptional("triggerCharacter", R.triggerCharacter))
+      !mapOptOrNull(Params, "triggerCharacter", R.triggerCharacter, P))
     return false;
   R.triggerKind = static_cast<CompletionTriggerKind>(TriggerKind);
   return true;
@@ -1121,8 +1136,8 @@ bool fromJSON(const llvm::json::Value &Params, ConfigurationSettings &S,
   llvm::json::ObjectMapper O(Params, P);
   if (!O)
     return true; // 'any' type in LSP.
-  return O.mapOptional("compilationDatabaseChanges",
-                       S.compilationDatabaseChanges);
+  return mapOptOrNull(Params, "compilationDatabaseChanges",
+                      S.compilationDatabaseChanges, P);
 }
 
 bool fromJSON(const llvm::json::Value &Params, InitializationOptions &Opts,
@@ -1133,8 +1148,8 @@ bool fromJSON(const llvm::json::Value &Params, InitializationOptions &Opts,
 
   return fromJSON(Params, Opts.ConfigSettings, P) &&
          O.map("compilationDatabasePath", Opts.compilationDatabasePath) &&
-         O.mapOptional("fallbackFlags", Opts.fallbackFlags) &&
-         O.mapOptional("clangdFileStatus", Opts.FileStatus);
+         mapOptOrNull(Params, "fallbackFlags", Opts.fallbackFlags, P) &&
+         mapOptOrNull(Params, "clangdFileStatus", Opts.FileStatus, P);
 }
 
 bool fromJSON(const llvm::json::Value &E, TypeHierarchyDirection &Out,
@@ -1190,10 +1205,11 @@ bool fromJSON(const llvm::json::Value &Params, TypeHierarchyItem &I,
   return O && O.map("name", I.name) && O.map("kind", I.kind) &&
          O.map("uri", I.uri) && O.map("range", I.range) &&
          O.map("selectionRange", I.selectionRange) &&
-         O.mapOptional("detail", I.detail) &&
-         O.mapOptional("deprecated", I.deprecated) &&
-         O.mapOptional("parents", I.parents) &&
-         O.mapOptional("children", I.children) && O.mapOptional("data", I.data);
+         mapOptOrNull(Params, "detail", I.detail, P) &&
+         mapOptOrNull(Params, "deprecated", I.deprecated, P) &&
+         mapOptOrNull(Params, "parents", I.parents, P) &&
+         mapOptOrNull(Params, "children", I.children, P) &&
+         mapOptOrNull(Params, "data", I.data, P);
 }
 
 bool fromJSON(const llvm::json::Value &Params,
@@ -1238,7 +1254,7 @@ bool fromJSON(const llvm::json::Value &Params, CallHierarchyItem &I,
   return O && O.map("name", I.name) && O.map("kind", I.kind) &&
          O.map("uri", I.uri) && O.map("range", I.range) &&
          O.map("selectionRange", I.selectionRange) &&
-         O.mapOptional("data", I.data);
+         mapOptOrNull(Params, "data", I.data, P);
 }
 
 bool fromJSON(const llvm::json::Value &Params,

From b1106a5b3bc94f6da11682007d101823f81bad30 Mon Sep 17 00:00:00 2001
From: Simonas Kazlauskas <git@kazlauskas.me>
Date: Tue, 16 Feb 2021 13:35:32 -0800
Subject: [PATCH 133/244] [llvm-dwp] Join dwo paths correctly when DWOPath is
 absolute

When the `DWOPath` is absolute, we want to use `DWOPath` as is, without prepending any other
components to the path. The `sys::path::append` does not join, but rather unconditionally appends
the paths, so something like `sys::path::append("/tmp", "/tmp/banana")` will result in
`/tmp/tmp/banana` rather than the desired `/tmp/banana`.

This then causes `llvm-dwp` to fail in a following situation:

```
$ clang -gsplit-dwarf /tmp/banana/test.c -c -o /tmp/outdir/foo.o
$ clang outdir/foo.o -o outdir/hm
$ llvm-dwarfdump outdir/hm | grep -C2 foo.dwo
                  DW_AT_comp_dir    ("/tmp")
                  DW_AT_GNU_pubnames  (true)
                  DW_AT_GNU_dwo_name    ("/tmp/outdir/foo.dwo")
                                DW_AT_GNU_dwo_id    (0xde4d396f3bf0e257)
                  DW_AT_low_pc  (0x0000000000401100)
$ strace -o trace llvm-dwp -e outdir/hm -o outdir/hm.dwp
error: No such file or directory
$ cat trace | grep foo.dwo
openat(AT_FDCWD, "/tmp/tmp/outdir/foo.dwo", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
```

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D96678

(cherry picked from commit 6ffcb2937c96bd0d7a55b984b5eb8f381b68e322)
---
 .../tools/llvm-dwp/X86/absolute_paths.test    | 37 +++++++++++++++++++
 llvm/tools/llvm-dwp/llvm-dwp.cpp              |  4 +-
 2 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/tools/llvm-dwp/X86/absolute_paths.test

diff --git a/llvm/test/tools/llvm-dwp/X86/absolute_paths.test b/llvm/test/tools/llvm-dwp/X86/absolute_paths.test
new file mode 100644
index 000000000000..1e3d27e7323b
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/absolute_paths.test
@@ -0,0 +1,37 @@
+; RUN: rm -rf %t
+; RUN: mkdir -p %t
+; RUN: llc %s -mtriple=x86_64-linux --split-dwarf-file=%t/test.dwo --split-dwarf-output=%t/test.dwo --filetype=obj -o %t/test.o
+; RUN: llvm-dwarfdump -v %t/test.dwo | FileCheck %s -DPATH=%t
+; RUN: llvm-dwp -e %t/test.o -o %t/test.dwp
+; RUN: llvm-dwarfdump -v %t/test.dwp | FileCheck %s -DPATH=%t
+
+; CHECK-LABEL: .debug_abbrev.dwo contents:
+; CHECK: DW_AT_name
+; CHECK: DW_AT_GNU_dwo_name
+; CHECK: DW_AT_name
+; CHECK-LABEL: .debug_str.dwo contents:
+; CHECK: "banana"
+; CHECK: "/tmp/test.c"
+; CHECK: "[[PATH]]/test.dwo"
+
+define void @banana() !dbg !8 {
+  ret void, !dbg !12
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.1", isOptimized: true, runtimeVersion: 0, splitDebugFilename: "test.dwo", emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: GNU)
+!1 = !DIFile(filename: "/tmp/test.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 7, !"PIC Level", i32 2}
+!7 = !{!"clang version 11.0.1"}
+!8 = distinct !DISubprogram(name: "banana", scope: !9, file: !9, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!9 = !DIFile(filename: "test.c", directory: "/tmp")
+!10 = !DISubroutineType(types: !11)
+!11 = !{null}
+!12 = !DILocation(line: 1, column: 20, scope: !8)
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 9aed3526b0aa..d495bd3d4cab 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -526,8 +526,8 @@ getDWOFilenames(StringRef ExecFilename) {
     std::string DWOCompDir =
         dwarf::toString(Die.find(dwarf::DW_AT_comp_dir), "");
     if (!DWOCompDir.empty()) {
-      SmallString<16> DWOPath;
-      sys::path::append(DWOPath, DWOCompDir, DWOName);
+      SmallString<16> DWOPath(std::move(DWOName));
+      sys::fs::make_absolute(DWOCompDir, DWOPath);
       DWOPaths.emplace_back(DWOPath.data(), DWOPath.size());
     } else {
       DWOPaths.push_back(std::move(DWOName));

From 0d4f8a3f394f55b5fde7033bf009e5dacea1a775 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 28 Jan 2021 16:35:18 +0300
Subject: [PATCH 134/244] [llvm-symbolizer] - Fix the crash in GNU output style
 with --no-inlines and missing input file.

Fixes https://bugs.llvm.org/show_bug.cgi?id=48882.

If the input file does not exist (or has a reading error), the
following code will crash if there are two or more input addresses.

```
auto ResOrErr = Symbolizer.symbolizeInlinedCode(
  ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
```

For the first address, `symbolizeInlinedCode` returns an error.
For the second address, `symbolizeInlinedCode` returns an empty result
(not an error) and `.getFrame(0)` will crash.

Differential revision: https://reviews.llvm.org/D95609

(cherry picked from commit d22140687500f90830fe416d9c1e317f7c4535d5)
---
 .../llvm-symbolizer/output-style-inlined.test | 21 +++++++++++++++++++
 .../tools/llvm-symbolizer/llvm-symbolizer.cpp |  7 ++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test
index 7e9f7e7ce180..1b8e3a2f22fb 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test
+++ b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test
@@ -28,3 +28,24 @@ RUN:   | FileCheck %s --check-prefix=LLVM --implicit-check-not=inctwo
 
 LLVM: main
 GNU: inctwo
+
+## Check that we are able to produce an output properly when the --no-inlines option
+## is specified, but a file doesn't exist. Check we report an error.
+
+RUN: llvm-symbolizer --output-style=GNU --obj=%p/Inputs/not.exist 0x1 0x2 --no-inlines 2>&1 \
+RUN:   | FileCheck %s --check-prefix=NOT-EXIST-GNU -DMSG=%errc_ENOENT
+RUN: llvm-symbolizer --output-style=LLVM --obj=%p/Inputs/not.exist 0x1 0x2 --no-inlines 2>&1 \
+RUN:   | FileCheck %s --check-prefix=NOT-EXIST-LLVM -DMSG=%errc_ENOENT
+
+# NOT-EXIST-GNU:      LLVMSymbolizer: error reading file: [[MSG]]
+# NOT-EXIST-GNU-NEXT: ??
+# NOT-EXIST-GNU-NEXT: ??:0
+# NOT-EXIST-GNU-NEXT: ??
+# NOT-EXIST-GNU-NEXT: ??:0
+
+# NOT-EXIST-LLVM:       LLVMSymbolizer: error reading file: [[MSG]]
+# NOT-EXIST-LLVM-NEXT:  ??
+# NOT-EXIST-LLVM-NEXT:  ??:0:0
+# NOT-EXIST-LLVM-EMPTY:
+# NOT-EXIST-LLVM-NEXT:  ??
+# NOT-EXIST-LLVM-NEXT:  ??:0:0
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 9c68acee0ae2..8734c2d74045 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -181,7 +181,12 @@ static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
     // the topmost function, which suits our needs better.
     auto ResOrErr = Symbolizer.symbolizeInlinedCode(
         ModuleName, {Offset, object::SectionedAddress::UndefSection});
-    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
+    if (!ResOrErr || ResOrErr->getNumberOfFrames() == 0) {
+      error(ResOrErr);
+      Printer << DILineInfo();
+    } else {
+      Printer << ResOrErr->getFrame(0);
+    }
   } else {
     auto ResOrErr = Symbolizer.symbolizeCode(
         ModuleName, {Offset, object::SectionedAddress::UndefSection});

From d3f9f512a47f10d27a9e32edaaa7513a64b0ec17 Mon Sep 17 00:00:00 2001
From: "William S. Moses" <gh@wsmoses.com>
Date: Mon, 1 Feb 2021 18:16:17 -0500
Subject: [PATCH 135/244] [SROA] Propagate correct TBAA/TBAA Struct offsets

SROA does not correctly account for offsets in TBAA/TBAA struct metadata.
This patch creates functionality for generating new MD with the corresponding
offset and updates SROA to use this functionality.

Differential Revision: https://reviews.llvm.org/D95826

(cherry picked from commit 40862b1a7486a969ff044cd240aad24f4183cc10)
---
 llvm/include/llvm/IR/Metadata.h              |  18 +++
 llvm/include/llvm/IR/Operator.h              |   5 +
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp |  81 ++++++++++++
 llvm/lib/IR/Operator.cpp                     |  20 ++-
 llvm/lib/Transforms/Scalar/SROA.cpp          |  38 ++++--
 llvm/test/Transforms/SROA/basictest.ll       | 128 +++++++++++--------
 llvm/test/Transforms/SROA/tbaa-struct2.ll    |  51 ++++++++
 7 files changed, 269 insertions(+), 72 deletions(-)
 create mode 100644 llvm/test/Transforms/SROA/tbaa-struct2.ll

diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 0b87416befe9..9a4480b75a30 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -667,6 +667,12 @@ struct AAMDNodes {
   /// The tag specifying the noalias scope.
   MDNode *NoAlias = nullptr;
 
+  // Shift tbaa Metadata node to start off bytes later
+  static MDNode *ShiftTBAA(MDNode *M, size_t off);
+
+  // Shift tbaa.struct Metadata node to start off bytes later
+  static MDNode *ShiftTBAAStruct(MDNode *M, size_t off);
+
   /// Given two sets of AAMDNodes that apply to the same pointer,
   /// give the best AAMDNodes that are compatible with both (i.e. a set of
   /// nodes whose allowable aliasing conclusions are a subset of those
@@ -680,6 +686,18 @@ struct AAMDNodes {
     Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr;
     return Result;
   }
+
+  /// Create a new AAMDNode that describes this AAMDNode after applying a
+  /// constant offset to the start of the pointer
+  AAMDNodes shift(size_t Offset) {
+    AAMDNodes Result;
+    Result.TBAA = TBAA ? ShiftTBAA(TBAA, Offset) : nullptr;
+    Result.TBAAStruct =
+        TBAAStruct ? ShiftTBAAStruct(TBAAStruct, Offset) : nullptr;
+    Result.Scope = Scope;
+    Result.NoAlias = NoAlias;
+    return Result;
+  }
 };
 
 // Specialize DenseMapInfo for AAMDNodes.
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index acfacbd6c74e..945f7e46e142 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -568,6 +568,11 @@ class GEPOperator
   bool accumulateConstantOffset(
       const DataLayout &DL, APInt &Offset,
       function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr) const;
+
+  static bool accumulateConstantOffset(
+      Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
+      APInt &Offset,
+      function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr);
 };
 
 class PtrToIntOperator
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 7d97fc5da9b0..268acb682cf1 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -737,3 +737,84 @@ bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
 }
+
+MDNode *AAMDNodes::ShiftTBAA(MDNode *MD, size_t Offset) {
+  // Fast path if there's no offset
+  if (Offset == 0)
+    return MD;
+  // Fast path if there's no path tbaa node (and thus scalar)
+  if (!isStructPathTBAA(MD))
+    return MD;
+
+  TBAAStructTagNode Tag(MD);
+  SmallVector<Metadata *, 5> Sub;
+  Sub.push_back(MD->getOperand(0));
+  Sub.push_back(MD->getOperand(1));
+  ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(2));
+
+  if (Tag.isNewFormat()) {
+    ConstantInt *InnerSize = mdconst::extract<ConstantInt>(MD->getOperand(3));
+
+    if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset) {
+      return nullptr;
+    }
+
+    uint64_t NewSize = InnerSize->getZExtValue();
+    uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
+    if (InnerOffset->getZExtValue() < Offset) {
+      NewOffset = 0;
+      NewSize -= Offset - InnerOffset->getZExtValue();
+    }
+
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerOffset->getType(), NewOffset)));
+
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerSize->getType(), NewSize)));
+
+    // immutable type
+    if (MD->getNumOperands() >= 5)
+      Sub.push_back(MD->getOperand(4));
+  } else {
+    if (InnerOffset->getZExtValue() < Offset)
+      return nullptr;
+
+    Sub.push_back(ConstantAsMetadata::get(ConstantInt::get(
+        InnerOffset->getType(), InnerOffset->getZExtValue() - Offset)));
+
+    // immutable type
+    if (MD->getNumOperands() >= 4)
+      Sub.push_back(MD->getOperand(3));
+  }
+  return MDNode::get(MD->getContext(), Sub);
+}
+
+MDNode *AAMDNodes::ShiftTBAAStruct(MDNode *MD, size_t Offset) {
+  // Fast path if there's no offset
+  if (Offset == 0)
+    return MD;
+  SmallVector<Metadata *, 3> Sub;
+  for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
+    ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i));
+    ConstantInt *InnerSize =
+        mdconst::extract<ConstantInt>(MD->getOperand(i + 1));
+    // Don't include any triples that aren't in bounds
+    if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
+      continue;
+
+    uint64_t NewSize = InnerSize->getZExtValue();
+    uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
+    if (InnerOffset->getZExtValue() < Offset) {
+      NewOffset = 0;
+      NewSize -= Offset - InnerOffset->getZExtValue();
+    }
+
+    // Shift the offset of the triple
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerOffset->getType(), NewOffset)));
+    Sub.push_back(ConstantAsMetadata::get(
+        ConstantInt::get(InnerSize->getType(), NewSize)));
+    Sub.push_back(MD->getOperand(i + 2));
+  }
+  return MDNode::get(MD->getContext(), Sub);
+}
\ No newline at end of file
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index 0f70fc37dee2..69181f35827b 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -61,10 +61,17 @@ Align GEPOperator::getMaxPreservedAlignment(const DataLayout &DL) const {
 bool GEPOperator::accumulateConstantOffset(
     const DataLayout &DL, APInt &Offset,
     function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
-   assert(Offset.getBitWidth() ==
-              DL.getIndexSizeInBits(getPointerAddressSpace()) &&
-          "The offset bit width does not match DL specification.");
+  assert(Offset.getBitWidth() ==
+             DL.getIndexSizeInBits(getPointerAddressSpace()) &&
+         "The offset bit width does not match DL specification.");
+  SmallVector<const Value *> Index(value_op_begin() + 1, value_op_end());
+  return GEPOperator::accumulateConstantOffset(getSourceElementType(), Index,
+                                               DL, Offset, ExternalAnalysis);
+}
 
+bool GEPOperator::accumulateConstantOffset(
+    Type *SourceType, ArrayRef<const Value *> Index, const DataLayout &DL,
+    APInt &Offset, function_ref<bool(Value &, APInt &)> ExternalAnalysis) {
   bool UsedExternalAnalysis = false;
   auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool {
     Index = Index.sextOrTrunc(Offset.getBitWidth());
@@ -85,9 +92,10 @@ bool GEPOperator::accumulateConstantOffset(
     }
     return true;
   };
-
-  for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
-       GTI != GTE; ++GTI) {
+  auto begin = generic_gep_type_iterator<decltype(Index.begin())>::begin(
+      SourceType, Index.begin());
+  auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
+  for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
     // Scalable vectors are multiplied by a runtime constant.
     bool ScalableType = false;
     if (isa<ScalableVectorType>(GTI.getIndexedType()))
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index d111a6ba4241..af510f1a84bf 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2524,7 +2524,7 @@ class llvm::sroa::AllocaSliceRewriter
                                               NewAI.getAlign(), LI.isVolatile(),
                                               LI.getName());
       if (AATags)
-        NewLI->setAAMetadata(AATags);
+        NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
       if (NewLI->isAtomic())
@@ -2563,7 +2563,7 @@ class llvm::sroa::AllocaSliceRewriter
           IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
                                 getSliceAlign(), LI.isVolatile(), LI.getName());
       if (AATags)
-        NewLI->setAAMetadata(AATags);
+        NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
 
@@ -2626,7 +2626,7 @@ class llvm::sroa::AllocaSliceRewriter
     }
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
     if (AATags)
-      Store->setAAMetadata(AATags);
+      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     Pass.DeadInsts.push_back(&SI);
 
     LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
@@ -2650,7 +2650,7 @@ class llvm::sroa::AllocaSliceRewriter
     Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
     if (AATags)
-      Store->setAAMetadata(AATags);
+      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     Pass.DeadInsts.push_back(&SI);
     LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
     return true;
@@ -2720,7 +2720,7 @@ class llvm::sroa::AllocaSliceRewriter
     NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
     if (AATags)
-      NewSI->setAAMetadata(AATags);
+      NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     if (SI.isVolatile())
       NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
     if (NewSI->isAtomic())
@@ -2816,7 +2816,7 @@ class llvm::sroa::AllocaSliceRewriter
           getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
           MaybeAlign(getSliceAlign()), II.isVolatile());
       if (AATags)
-        New->setAAMetadata(AATags);
+        New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
     }
@@ -2885,7 +2885,7 @@ class llvm::sroa::AllocaSliceRewriter
     StoreInst *New =
         IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
     if (AATags)
-      New->setAAMetadata(AATags);
+      New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
     return !II.isVolatile();
   }
@@ -3006,7 +3006,7 @@ class llvm::sroa::AllocaSliceRewriter
       CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
                                        Size, II.isVolatile());
       if (AATags)
-        New->setAAMetadata(AATags);
+        New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       LLVM_DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
     }
@@ -3060,7 +3060,7 @@ class llvm::sroa::AllocaSliceRewriter
       LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
                                              II.isVolatile(), "copyload");
       if (AATags)
-        Load->setAAMetadata(AATags);
+        Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
       Src = Load;
     }
 
@@ -3080,7 +3080,7 @@ class llvm::sroa::AllocaSliceRewriter
     StoreInst *Store = cast<StoreInst>(
         IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
     if (AATags)
-      Store->setAAMetadata(AATags);
+      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
     LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
     return !II.isVolatile();
   }
@@ -3381,8 +3381,13 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
           IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
       LoadInst *Load =
           IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
-      if (AATags)
-        Load->setAAMetadata(AATags);
+
+      APInt Offset(
+          DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+      if (AATags &&
+          GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+        Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       LLVM_DEBUG(dbgs() << "          to: " << *Load << "\n");
     }
@@ -3428,8 +3433,13 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
           IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
       StoreInst *Store =
           IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
-      if (AATags)
-        Store->setAAMetadata(AATags);
+
+      APInt Offset(
+          DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
+      if (AATags &&
+          GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+        Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+
       LLVM_DEBUG(dbgs() << "          to: " << *Store << "\n");
     }
   };
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index fea4f995a57e..d15d01e096f6 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -145,27 +145,28 @@ entry:
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %src, i32 42, {{.*}}), !tbaa [[TAG_0:!.*]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
-; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M42:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0_M43:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0_M142:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0_M158:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207
-; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NOT:  %[[bad_test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa
+; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_0:!.*]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}})
 
   ; Clobber a single element of the array, this should be promotable, and be deleted.
   %c = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 42
@@ -310,7 +311,7 @@ entry:
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3, {{.*}}), !tbaa [[TAG_51:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5, {{.*}}), !tbaa [[TAG_51]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5, {{.*}})
 
   ; Bridge between the overlapping areas
   call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false), !tbaa !53
@@ -318,7 +319,7 @@ entry:
 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5, {{.*}}), !tbaa [[TAG_53:!.*]]
 ; ...promoted i8 store...
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2, {{.*}}), !tbaa [[TAG_53]]
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2, {{.*}})
 
   ; Entirely within the second overlap.
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false), !tbaa !55
@@ -331,33 +332,33 @@ entry:
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_57:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3, {{.*}}), !tbaa [[TAG_57]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3, {{.*}})
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i1 false), !tbaa !59
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 42, {{.*}}), !tbaa [[TAG_59:!.*]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42
-; CHECK-NEXT: store i8 0, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]]
+; CHECK-NEXT: store i8 0, i8* %[[gep]], {{.*}}
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}})
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}})
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}})
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207
-; CHECK-NEXT: store i8 42, i8* %[[gep]], {{.*}}, !tbaa [[TAG_59]]
+; CHECK-NEXT: store i8 42, i8* %[[gep]], {{.*}}
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}), !tbaa [[TAG_59]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}})
 
   ret void
 }
@@ -381,41 +382,41 @@ entry:
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 20, {{.*}}), !tbaa [[TAG_0]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0_M20:!.*]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 22
-; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M22:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0_M23:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 30
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_0_M30:!.*]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0_M40:!.*]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
-; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M42]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0_M43]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0_M50:!.*]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 52
-; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M52:!.*]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0_M53:!.+]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 60
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_0]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_0_M60:!.+]]
 
   %a.src.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 20
   %a.dst.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 40
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false), !tbaa !3
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_3]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 
   ; Clobber a single element of the array, this should be promotable, and be deleted.
   %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42
@@ -425,41 +426,41 @@ entry:
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false), !tbaa !5
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_5]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false), !tbaa !7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20, {{.*}}), !tbaa [[TAG_7]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]], {{.*}}
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 22
-; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]], {{.*}}
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 23
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 30
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}})
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42
-; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}}
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 52
-; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}, !tbaa [[TAG_7]]
+; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}}
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 53
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}})
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 60
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_7]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}})
 
   ret void
 }
@@ -856,7 +857,7 @@ define void @test18(i8* %src, i8* %dst, i32 %size) {
 ; CHECK:      %[[a:.*]] = alloca [34 x i8]
 ; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8, i8* %src, i64 4
 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
-; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]], {{.*}}, !tbaa [[TAG_0]]
+; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]], {{.*}}, !tbaa [[TAG_0_M4:!.*]]
 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[agep1]], i8* %src, i32 %size, {{.*}}), !tbaa [[TAG_3]]
 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
@@ -865,7 +866,7 @@ define void @test18(i8* %src, i8* %dst, i32 %size) {
 ; CHECK-NEXT: store i32 42, i32* %[[dstcast1]], {{.*}}, !tbaa [[TAG_9]]
 ; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4
 ; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
-; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}}, !tbaa [[TAG_9]]
+; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}}
 ; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]]
 ; CHECK-NEXT: ret void
@@ -1896,7 +1897,7 @@ bb7:
   ret void
 }
 
-!0 = !{!1, !1, i64 0, i64 1}
+!0 = !{!1, !1, i64 0, i64 200}
 !1 = !{!2, i64 1, !"type_0"}
 !2 = !{!"root"}
 !3 = !{!4, !4, i64 0, i64 1}
@@ -1958,14 +1959,39 @@ bb7:
 !59 = !{!60, !60, i64 0, i64 1}
 !60 = !{!2, i64 1, !"type_59"}
 
-; CHECK-DAG: [[TYPE_0:!.*]] = !{{{.*}}, !"type_0"}
-; CHECK-DAG: [[TAG_0]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 1}
+; CHECK-DAG: [[TAG_0]] = !{[[TYPE_0:!.*]], [[TYPE_0]], i64 0, i64 200}
+; CHECK-DAG: [[TYPE_0]] = !{{{.*}}, !"type_0"}
+
+; CHECK-DAG: [[TAG_0_M42]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 158}
+; CHECK-DAG: [[TAG_0_M43]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 157}
+; CHECK-DAG: [[TAG_0_M142]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 58}
+; CHECK-DAG: [[TAG_0_M158]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 42}
+
+; CHECK-DAG: [[TAG_59]] = !{[[TYPE_59:!.*]], [[TYPE_59]], i64 0, i64 1}
+; CHECK-DAG: [[TYPE_59]] = !{{{.*}}, !"type_59"}
+
+; CHECK-DAG: [[TAG_0_M20]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 180}
+; CHECK-DAG: [[TAG_0_M22]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 178}
+; CHECK-DAG: [[TAG_0_M23]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 177}
+; CHECK-DAG: [[TAG_0_M30]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 170}
+; CHECK-DAG: [[TAG_0_M40]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 160}
+; CHECK-DAG: [[TAG_0_M50]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 150}
+; CHECK-DAG: [[TAG_0_M52]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 148}
+; CHECK-DAG: [[TAG_0_M53]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 147}
+; CHECK-DAG: [[TAG_0_M60]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 140}
+
+; CHECK-DAG: [[TYPE_7:!.*]] = !{{{.*}}, !"type_7"}
+; CHECK-DAG: [[TAG_7]] = !{[[TYPE_7]], [[TYPE_7]], i64 0, i64 1}
+
 ; CHECK-DAG: [[TYPE_3:!.*]] = !{{{.*}}, !"type_3"}
 ; CHECK-DAG: [[TAG_3]] = !{[[TYPE_3]], [[TYPE_3]], i64 0, i64 1}
+
 ; CHECK-DAG: [[TYPE_5:!.*]] = !{{{.*}}, !"type_5"}
 ; CHECK-DAG: [[TAG_5]] = !{[[TYPE_5]], [[TYPE_5]], i64 0, i64 1}
-; CHECK-DAG: [[TYPE_7:!.*]] = !{{{.*}}, !"type_7"}
-; CHECK-DAG: [[TAG_7]] = !{[[TYPE_7]], [[TYPE_7]], i64 0, i64 1}
+
+; CHECK-DAG: [[TAG_0_M4]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 196}
+
+
 ; CHECK-DAG: [[TYPE_9:!.*]] = !{{{.*}}, !"type_9"}
 ; CHECK-DAG: [[TAG_9]] = !{[[TYPE_9]], [[TYPE_9]], i64 0, i64 1}
 ; CHECK-DAG: [[TYPE_11:!.*]] = !{{{.*}}, !"type_11"}
@@ -2016,5 +2042,3 @@ bb7:
 ; CHECK-DAG: [[TAG_55]] = !{[[TYPE_55]], [[TYPE_55]], i64 0, i64 1}
 ; CHECK-DAG: [[TYPE_57:!.*]] = !{{{.*}}, !"type_57"}
 ; CHECK-DAG: [[TAG_57]] = !{[[TYPE_57]], [[TYPE_57]], i64 0, i64 1}
-; CHECK-DAG: [[TYPE_59:!.*]] = !{{{.*}}, !"type_59"}
-; CHECK-DAG: [[TAG_59]] = !{[[TYPE_59]], [[TYPE_59]], i64 0, i64 1}
diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll
new file mode 100644
index 000000000000..75f72f4e9963
--- /dev/null
+++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -sroa %s | FileCheck %s
+
+; SROA should correctly offset `!tbaa.struct` metadata
+
+%struct.Wishart = type { double, i32 }
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* writeonly, i8* readonly, i64, i1 immarg)
+declare double @subcall(double %g, i32 %m)
+
+define double @bar(%struct.Wishart* %wishart) {
+  %tmp = alloca %struct.Wishart, align 8
+  %tmpaddr = bitcast %struct.Wishart* %tmp to i8*
+  %waddr = bitcast %struct.Wishart* %wishart to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmpaddr, i8* align 8 %waddr, i64 16, i1 false), !tbaa.struct !2
+  %gamma = getelementptr inbounds %struct.Wishart, %struct.Wishart* %tmp, i32 0, i32 0
+  %lg = load double, double* %gamma, align 8, !tbaa !4
+  %m = getelementptr inbounds %struct.Wishart, %struct.Wishart* %tmp, i32 0, i32 1
+  %lm = load i32, i32* %m, align 8, !tbaa !8
+  %call = call double @subcall(double %lg, i32 %lm)
+  ret double %call
+}
+
+!2 = !{i64 0, i64 8, !3, i64 8, i64 4, !7}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"double", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"int", !5, i64 0}
+
+; CHECK: define double @bar(%struct.Wishart* %wishart) {
+; CHECK-NEXT:   %tmp.sroa.3 = alloca [4 x i8], align 4
+; CHECK-NEXT:   %tmp.sroa.0.0.waddr.sroa_idx = getelementptr inbounds %struct.Wishart, %struct.Wishart* %wishart, i64 0, i32 0
+; CHECK-NEXT:   %tmp.sroa.0.0.copyload = load double, double* %tmp.sroa.0.0.waddr.sroa_idx, align 8, !tbaa.struct !0
+; CHECK-NEXT:   %tmp.sroa.2.0.waddr.sroa_idx1 = getelementptr inbounds %struct.Wishart, %struct.Wishart* %wishart, i64 0, i32 1
+; CHECK-NEXT:   %tmp.sroa.2.0.copyload = load i32, i32* %tmp.sroa.2.0.waddr.sroa_idx1, align 8, !tbaa.struct !7
+; CHECK-NEXT:   %tmp.sroa.3.0.waddr.sroa_raw_cast = bitcast %struct.Wishart* %wishart to i8*
+; CHECK-NEXT:   %tmp.sroa.3.0.waddr.sroa_raw_idx = getelementptr inbounds i8, i8* %tmp.sroa.3.0.waddr.sroa_raw_cast, i64 12
+; CHECK-NEXT:   %tmp.sroa.3.0.tmpaddr.sroa_idx = getelementptr inbounds [4 x i8], [4 x i8]* %tmp.sroa.3, i64 0, i64 0
+; CHECK-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp.sroa.3.0.tmpaddr.sroa_idx, i8* align 4 %tmp.sroa.3.0.waddr.sroa_raw_idx, i64 4, i1 false), !tbaa.struct !8
+; CHECK-NEXT:   %call = call double @subcall(double %tmp.sroa.0.0.copyload, i32 %tmp.sroa.2.0.copyload)
+; CHECK-NEXT:   ret double %call
+; CHECK-NEXT: }
+
+; CHECK: !0 = !{i64 0, i64 8, !1, i64 8, i64 4, !5}
+; CHECK: !1 = !{!2, !2, i64 0}
+; CHECK: !2 = !{!"double", !{{[0-9]+}}, i64 0}
+
+; CHECK: !5 = !{!6, !6, i64 0}
+; CHECK: !6 = !{!"int", !{{[0-9]+}}, i64 0}
+; CHECK: !7 = !{i64 0, i64 4, !5}
+; CHECK: !8 = !{}
\ No newline at end of file

From a7629a2244a325b908ddbd4336aef25a7049bda9 Mon Sep 17 00:00:00 2001
From: Yang Fan <nullptr.cpp@gmail.com>
Date: Wed, 3 Feb 2021 11:04:58 +0800
Subject: [PATCH 136/244] [CSSPGO] Fix MSVC initializing truncation warning
 (NFC)

MSVC warning:
```
\llvm-project\llvm\include\llvm\Transforms\IPO\SampleProfileProbe.h(65): warning C4305: 'initializing': truncation from 'double' to 'const float'
```
---
 llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index cab893b50d19..0fd79d8ff7f3 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -62,7 +62,7 @@ class PseudoProbeVerifier {
 
 private:
   // Allow a little bias due the rounding to integral factors.
-  constexpr static float DistributionFactorVariance = 0.02;
+  constexpr static float DistributionFactorVariance = 0.02f;
   // Distribution factors from last pass.
   FuncProbeFactorMap FunctionProbeFactors;
 

From 78b35e278a9f62c2a6cfe3c974155a7e9bb60361 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Mon, 11 Jan 2021 09:08:39 -0800
Subject: [PATCH 137/244] [CSSPGO][llvm-profgen] Pseudo probe based CS profile
 generation

This change implements profile generation infra for pseudo probe in llvm-profgen. During virtual unwinding, the raw profile is extracted into range counter and branch counter and aggregated to sample counter map indexed by the call stack context. This change introduces the last step and produces the eventual profile. Specifically, the body of function sample is recorded by going through each probe among the range and callsite target sample is recorded by extracting the callsite probe from branch's source.

Please refer https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s and https://reviews.llvm.org/D89707 for more context about CSSPGO and llvm-profgen.

**Implementation**

- Extended `PseudoProbeProfileGenerator` for pseudo probe based profile generation.
- `populateBodySamplesWithProbes` reading range counter is responsible for recording function body samples and inferring caller's body samples.
- `populateBoundarySamplesWithProbes` reading branch counter is responsible for recording call site target samples.
- Each sample is recorded with its calling context(named `ContextId`). Remind that the probe based context key doesn't include the leaf frame probe info, so the `ContextId` string is created from two part: one from the probe stack strings' concatenation and other one from the leaf frame probe.
- Added regression test

Test Plan:

ninja & ninja check-llvm

Differential Revision: https://reviews.llvm.org/D92998
---
 .../llvm-profgen/inline-cs-pseudoprobe.test   |  17 ++
 .../llvm-profgen/noinline-cs-pseudoprobe.test |  16 ++
 llvm/tools/llvm-profgen/PerfReader.cpp        |   4 -
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 198 +++++++++++++++++-
 llvm/tools/llvm-profgen/ProfileGenerator.h    |  41 +++-
 llvm/tools/llvm-profgen/ProfiledBinary.h      |  11 +-
 llvm/tools/llvm-profgen/PseudoProbe.cpp       |  36 ++--
 llvm/tools/llvm-profgen/PseudoProbe.h         |  13 +-
 8 files changed, 307 insertions(+), 29 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
index 109f2f63e86d..19928322a66d 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
@@ -1,4 +1,21 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: FileCheck %s --input-file %t
+
+; CHECK:     [main:2 @ foo]:74:0
+; CHECK-NEXT: 2: 15
+; CHECK-NEXT: 3: 15
+; CHECK-NEXT: 4: 14
+; CHECK-NEXT: 5: 1
+; CHECK-NEXT: 6: 15
+; CHECK-NEXT: 8: 14 bar:14
+; CHECK-NEXT: !CFGChecksum: 138950591924
+; CHECK-NEXT:[main:2 @ foo:8 @ bar]:56:14
+; CHECK-NEXT: 1: 14
+; CHECK-NEXT: 2: 14
+; CHECK-NEXT: 3: 14
+; CHECK-NEXT: 4: 14
+; CHECK-NEXT: !CFGChecksum: 72617220756
+
 
 ; CHECK-UNWINDER:      Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter:
 ; CHECK-UNWINDER-EMPTY:
diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
index 2ac3f06587d9..0491a62ff69b 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
@@ -1,4 +1,20 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: FileCheck %s --input-file %t
+
+; CHECK:     [main:2 @ foo]:75:0
+; CHECK-NEXT: 2: 15
+; CHECK-NEXT: 3: 15
+; CHECK-NEXT: 4: 15
+; CHECK-NEXT: 6: 15
+; CHECK-NEXT: 8: 15 bar:15
+; CHECK-NEXT: !CFGChecksum: 138950591924
+; CHECK-NEXT:[main:2 @ foo:8 @ bar]:60:15
+; CHECK-NEXT: 1: 15
+; CHECK-NEXT: 2: 15
+; CHECK-NEXT: 3: 15
+; CHECK-NEXT: 4: 15
+; CHECK-NEXT: !CFGChecksum: 72617220756
+
 
 ; CHECK-UNWINDER:      Binary(noinline-cs-pseudoprobe.perfbin)'s Range Counter:
 ; CHECK-UNWINDER-NEXT: main:2
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index d08c15808cf4..64a502be59a9 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -567,11 +567,7 @@ void PerfReader::checkAndSetPerfType(
   }
 
   if (HasHybridPerf) {
-    // Set up ProfileIsCS to enable context-sensitive functionalities
-    // in SampleProf
-    FunctionSamples::ProfileIsCS = true;
     PerfType = PERF_LBR_STACK;
-
   } else {
     // TODO: Support other type of perf script
     PerfType = PERF_INVILID;
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 7624fd3f2808..ce228a781538 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -67,7 +67,7 @@ void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
   /*
   Regions may overlap with each other. Using the boundary info, find all
   disjoint ranges and their sample count. BoundaryPoint contains the count
-  mutiple samples begin/end at this points.
+  multiple samples begin/end at this points.
 
   |<--100-->|           Sample1
   |<------200------>|   Sample2
@@ -264,9 +264,12 @@ static FrameLocation getCallerContext(StringRef CalleeContext,
   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
   CallerNameWithContext = CallerContext.rsplit(':').first;
   auto ContextSplit = CallerContext.rsplit(" @ ");
+  StringRef CallerFrameStr = ContextSplit.second.size() == 0
+                                 ? ContextSplit.first
+                                 : ContextSplit.second;
   FrameLocation LeafFrameLoc = {"", {0, 0}};
   StringRef Funcname;
-  SampleContext::decodeContextString(ContextSplit.second, Funcname,
+  SampleContext::decodeContextString(CallerFrameStr, Funcname,
                                      LeafFrameLoc.second);
   LeafFrameLoc.first = Funcname.str();
   return LeafFrameLoc;
@@ -316,5 +319,196 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
   }
 }
 
+// Helper function to extract context prefix
+// PrefixContextId is the context id string except for the leaf probe's
+// context, the final ContextId will be:
+// ContextId =  PrefixContextId + LeafContextId;
+// Remind that the string in ContextStrStack is in callee-caller order
+// So process the string vector reversely
+static std::string
+extractPrefixContextId(const SmallVector<const PseudoProbe *, 16> &Probes,
+                       ProfiledBinary *Binary) {
+  SmallVector<std::string, 16> ContextStrStack;
+  for (const auto *P : Probes) {
+    Binary->getInlineContextForProbe(P, ContextStrStack, true);
+  }
+  std::ostringstream OContextStr;
+  for (auto &CxtStr : ContextStrStack) {
+    if (OContextStr.str().size())
+      OContextStr << " @ ";
+    OContextStr << CxtStr;
+  }
+  return OContextStr.str();
+}
+
+void PseudoProbeCSProfileGenerator::generateProfile() {
+  // Enable CS and pseudo probe functionalities in SampleProf
+  FunctionSamples::ProfileIsCS = true;
+  FunctionSamples::ProfileIsProbeBased = true;
+  for (const auto &BI : BinarySampleCounters) {
+    ProfiledBinary *Binary = BI.first;
+    for (const auto &CI : BI.second) {
+      const ProbeBasedCtxKey *CtxKey =
+          dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
+      std::string PrefixContextId =
+          extractPrefixContextId(CtxKey->Probes, Binary);
+      // Fill in function body samples from probes, also infer caller's samples
+      // from callee's probe
+      populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId,
+                                    Binary);
+      // Fill in boundary samples for a call probe
+      populateBoundarySamplesWithProbes(CI.second.BranchCounter,
+                                        PrefixContextId, Binary);
+    }
+  }
+}
+
+void PseudoProbeCSProfileGenerator::extractProbesFromRange(
+    const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
+    ProfiledBinary *Binary) {
+  RangeSample Ranges;
+  findDisjointRanges(Ranges, RangeCounter);
+  for (const auto &Range : Ranges) {
+    uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
+    uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
+    uint64_t Count = Range.second;
+    // Disjoint ranges have introduce zero-filled gap that
+    // doesn't belong to current context, filter them out.
+    if (Count == 0)
+      continue;
+
+    InstructionPointer IP(Binary, RangeBegin, true);
+
+    // Disjoint ranges may have range in the middle of two instr,
+    // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
+    // can be Addr1+1 to Addr2-1. We should ignore such range.
+    if (IP.Address > RangeEnd)
+      continue;
+
+    while (IP.Address <= RangeEnd) {
+      const AddressProbesMap &Address2ProbesMap =
+          Binary->getAddress2ProbesMap();
+      auto It = Address2ProbesMap.find(IP.Address);
+      if (It != Address2ProbesMap.end()) {
+        for (const auto &Probe : It->second) {
+          if (!Probe.isBlock())
+            continue;
+          ProbeCounter[&Probe] += Count;
+        }
+      }
+
+      IP.advance();
+    }
+  }
+}
+
+void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
+    const RangeSample &RangeCounter, StringRef PrefixContextId,
+    ProfiledBinary *Binary) {
+  ProbeCounterMap ProbeCounter;
+  // Extract the top frame probes by looking up each address among the range in
+  // the Address2ProbeMap
+  extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
+  for (auto PI : ProbeCounter) {
+    const PseudoProbe *Probe = PI.first;
+    uint64_t Count = PI.second;
+    FunctionSamples &FunctionProfile =
+        getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary);
+
+    FunctionProfile.addBodySamples(Probe->Index, 0, Count);
+    FunctionProfile.addTotalSamples(Count);
+    if (Probe->isEntry()) {
+      FunctionProfile.addHeadSamples(Count);
+      // Look up for the caller's function profile
+      const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
+      if (InlinerDesc != nullptr) {
+        // Since the context id will be compressed, we have to use callee's
+        // context id to infer caller's context id to ensure they share the
+        // same context prefix.
+        StringRef CalleeContextId =
+            FunctionProfile.getContext().getNameWithContext(true);
+        StringRef CallerContextId;
+        FrameLocation &&CallerLeafFrameLoc =
+            getCallerContext(CalleeContextId, CallerContextId);
+        uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
+        assert(CallerIndex &&
+               "Inferred caller's location index shouldn't be zero!");
+        FunctionSamples &CallerProfile =
+            getFunctionProfileForContext(CallerContextId);
+        CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
+        CallerProfile.addBodySamples(CallerIndex, 0, Count);
+        CallerProfile.addTotalSamples(Count);
+        CallerProfile.addCalledTargetSamples(CallerIndex, 0,
+                                             FunctionProfile.getName(), Count);
+      }
+    }
+  }
+}
+
+void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
+    const BranchSample &BranchCounter, StringRef PrefixContextId,
+    ProfiledBinary *Binary) {
+  for (auto BI : BranchCounter) {
+    uint64_t SourceOffset = BI.first.first;
+    uint64_t TargetOffset = BI.first.second;
+    uint64_t Count = BI.second;
+    uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
+    const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
+    if (CallProbe == nullptr)
+      continue;
+    FunctionSamples &FunctionProfile =
+        getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary);
+    FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
+    FunctionProfile.addTotalSamples(Count);
+    StringRef CalleeName = FunctionSamples::getCanonicalFnName(
+        Binary->getFuncFromStartOffset(TargetOffset));
+    if (CalleeName.size() == 0)
+      continue;
+    FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
+                                           Count);
+  }
+}
+
+FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
+    StringRef PrefixContextId, SmallVector<std::string, 16> &LeafInlinedContext,
+    const PseudoProbeFuncDesc *LeafFuncDesc) {
+  assert(LeafInlinedContext.size() &&
+         "Profile context must have the leaf frame");
+  std::ostringstream OContextStr;
+  OContextStr << PrefixContextId.str();
+
+  for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) {
+    if (OContextStr.str().size())
+      OContextStr << " @ ";
+    OContextStr << LeafInlinedContext[I];
+  }
+  // For leaf inlined context with the top frame, we should strip off the top
+  // frame's probe id, like:
+  // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
+  if (OContextStr.str().size())
+    OContextStr << " @ ";
+  StringRef LeafLoc = LeafInlinedContext.back();
+  OContextStr << LeafLoc.split(":").first.str();
+
+  FunctionSamples &FunctionProile =
+      getFunctionProfileForContext(OContextStr.str());
+  FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
+  return FunctionProile;
+}
+
+FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
+    StringRef PrefixContextId, const PseudoProbe *LeafProbe,
+    ProfiledBinary *Binary) {
+  SmallVector<std::string, 16> LeafInlinedContext;
+  Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext);
+  // Note that the context from probe doesn't include leaf frame,
+  // hence we need to retrieve and append the leaf frame.
+  const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
+  LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" +
+                                  Twine(LeafProbe->Index).str());
+  return getFunctionProfileForLeafProbe(PrefixContextId, LeafInlinedContext,
+                                        FuncDesc);
+}
+
 } // end namespace sampleprof
 } // end namespace llvm
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 8040b90ea61a..29f528026a0c 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -25,7 +25,7 @@ class ProfileGenerator {
   ProfileGenerator(){};
   virtual ~ProfileGenerator() = default;
   static std::unique_ptr<ProfileGenerator>
-  create(const BinarySampleCounterMap &SampleCounters,
+  create(const BinarySampleCounterMap &BinarySampleCounters,
          enum PerfScriptType SampleType);
   virtual void generateProfile() = 0;
 
@@ -50,7 +50,6 @@ class ProfileGenerator {
   */
   void findDisjointRanges(RangeSample &DisjointRanges,
                           const RangeSample &Ranges);
-
   // Used by SampleProfileWriter
   StringMap<FunctionSamples> ProfileMap;
 };
@@ -65,6 +64,8 @@ class CSProfileGenerator : public ProfileGenerator {
 
 public:
   void generateProfile() override {
+    // Enable context-sensitive functionalities in SampleProf
+    FunctionSamples::ProfileIsCS = true;
     for (const auto &BI : BinarySampleCounters) {
       ProfiledBinary *Binary = BI.first;
       for (const auto &CI : BI.second) {
@@ -90,14 +91,16 @@ class CSProfileGenerator : public ProfileGenerator {
     populateInferredFunctionSamples();
   }
 
+protected:
+  // Lookup or create FunctionSamples for the context
+  FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
+
 private:
   // Helper function for updating body sample for a leaf location in
   // FunctionProfile
   void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
                                            const FrameLocation &LeafLoc,
                                            uint64_t Count);
-  // Lookup or create FunctionSamples for the context
-  FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
   void populateFunctionBodySamples(FunctionSamples &FunctionProfile,
                                    const RangeSample &RangeCounters,
                                    ProfiledBinary *Binary);
@@ -108,14 +111,38 @@ class CSProfileGenerator : public ProfileGenerator {
   void populateInferredFunctionSamples();
 };
 
+using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
+
 class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
 
 public:
   PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters)
       : CSProfileGenerator(Counters) {}
-  void generateProfile() override {
-    // TODO
-  }
+  void generateProfile() override;
+
+private:
+  // Go through each address from range to extract the top frame probe by
+  // looking up in the Address2ProbeMap
+  void extractProbesFromRange(const RangeSample &RangeCounter,
+                              ProbeCounterMap &ProbeCounter,
+                              ProfiledBinary *Binary);
+  // Fill in function body samples from probes
+  void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
+                                     StringRef PrefixContextId,
+                                     ProfiledBinary *Binary);
+  // Fill in boundary samples for a call probe
+  void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
+                                         StringRef PrefixContextId,
+                                         ProfiledBinary *Binary);
+  // Helper function to get FunctionSamples for the leaf inlined context
+  FunctionSamples &getFunctionProfileForLeafProbe(
+      StringRef PrefixContextId,
+      SmallVector<std::string, 16> &LeafInlinedContext,
+      const PseudoProbeFuncDesc *LeafFuncDesc);
+  // Helper function to get FunctionSamples for the leaf probe
+  FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId,
+                                                  const PseudoProbe *LeafProbe,
+                                                  ProfiledBinary *Binary);
 };
 
 } // end namespace sampleprof
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index bb028da2b484..40aee39677e5 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -244,10 +244,19 @@ class ProfiledBinary {
   void
   getInlineContextForProbe(const PseudoProbe *Probe,
                            SmallVector<std::string, 16> &InlineContextStack,
-                           bool IncludeLeaf) const {
+                           bool IncludeLeaf = false) const {
     return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack,
                                                  IncludeLeaf);
   }
+  const AddressProbesMap &getAddress2ProbesMap() const {
+    return ProbeDecoder.getAddress2ProbesMap();
+  }
+  const PseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
+    return ProbeDecoder.getFuncDescForGUID(GUID);
+  }
+  const PseudoProbeFuncDesc *getInlinerDescForProbe(const PseudoProbe *Probe) {
+    return ProbeDecoder.getInlinerDescForProbe(Probe);
+  }
 };
 
 } // end namespace sampleprof
diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp
index 0b53f1aa02e7..700984e2184a 100644
--- a/llvm/tools/llvm-profgen/PseudoProbe.cpp
+++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp
@@ -41,7 +41,7 @@ void PseudoProbe::getInlineContext(SmallVector<std::string, 16> &ContextStack,
   PseudoProbeInlineTree *Cur = InlineTree;
   // It will add the string of each node's inline site during iteration.
   // Note that it won't include the probe's belonging function(leaf location)
-  while (!Cur->hasInlineSite()) {
+  while (Cur->hasInlineSite()) {
     std::string ContextStr;
     if (ShowName) {
       StringRef FuncName =
@@ -312,22 +312,32 @@ PseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
   return CallProbe;
 }
 
+const PseudoProbeFuncDesc *
+PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
+  auto It = GUID2FuncDescMap.find(GUID);
+  assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist");
+  return &It->second;
+}
+
 void PseudoProbeDecoder::getInlineContextForProbe(
     const PseudoProbe *Probe, SmallVector<std::string, 16> &InlineContextStack,
     bool IncludeLeaf) const {
-  if (IncludeLeaf) {
-    // Note that the context from probe doesn't include leaf frame,
-    // hence we need to retrieve and prepend leaf if requested.
-    auto It = GUID2FuncDescMap.find(Probe->GUID);
-    assert(It != GUID2FuncDescMap.end() &&
-           "Should have function descriptor for a valid GUID");
-    StringRef FuncName = It->second.FuncName;
-    // InlineContextStack is in callee-caller order, so push leaf in the front
-    InlineContextStack.emplace_back(FuncName.str() + ":" +
-                                    Twine(Probe->Index).str());
-  }
-
   Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true);
+  if (!IncludeLeaf)
+    return;
+  // Note that the context from probe doesn't include leaf frame,
+  // hence we need to retrieve and prepend leaf if requested.
+  const auto *FuncDesc = getFuncDescForGUID(Probe->GUID);
+  InlineContextStack.emplace_back(FuncDesc->FuncName + ":" +
+                                  Twine(Probe->Index).str());
+}
+
+const PseudoProbeFuncDesc *
+PseudoProbeDecoder::getInlinerDescForProbe(const PseudoProbe *Probe) const {
+  PseudoProbeInlineTree *InlinerNode = Probe->InlineTree;
+  if (!InlinerNode->hasInlineSite())
+    return nullptr;
+  return getFuncDescForGUID(std::get<0>(InlinerNode->ISite));
 }
 
 } // end namespace sampleprof
diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h
index 25769cad8805..a6647eb39c7a 100644
--- a/llvm/tools/llvm-profgen/PseudoProbe.h
+++ b/llvm/tools/llvm-profgen/PseudoProbe.h
@@ -73,7 +73,7 @@ class PseudoProbeInlineTree {
 
   void addProbes(PseudoProbe *Probe) { ProbeVector.push_back(Probe); }
   // Return false if it's a dummy inline site
-  bool hasInlineSite() const { return !std::get<0>(ISite); }
+  bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
 };
 
 // Function descriptor decoded from .pseudo_probe_desc section
@@ -203,17 +203,26 @@ class PseudoProbeDecoder {
   // Look up the probe of a call for the input address
   const PseudoProbe *getCallProbeForAddr(uint64_t Address) const;
 
+  const PseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
+
   // Helper function to populate one probe's inline stack into
   // \p InlineContextStack.
   // Current leaf location info will be added if IncludeLeaf is true
   // Example:
   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
-  //  IncludeLeaf = false, OUtput: [main:1, foo:2]
+  //  IncludeLeaf = false, Output: [main:1, foo:2]
   void
   getInlineContextForProbe(const PseudoProbe *Probe,
                            SmallVector<std::string, 16> &InlineContextStack,
                            bool IncludeLeaf) const;
+
+  const AddressProbesMap &getAddress2ProbesMap() const {
+    return Address2ProbesMap;
+  }
+
+  const PseudoProbeFuncDesc *
+  getInlinerDescForProbe(const PseudoProbe *Probe) const;
 };
 
 } // end namespace sampleprof

From 6209b0756d5df805f6279d3dadc8d2ba8648c3eb Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Fri, 29 Jan 2021 15:00:08 -0800
Subject: [PATCH 138/244] [CSSPGO][llvm-profgen] Compress recursive cycles in
 calling context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change compresses the context string by removing cycles due to recursive function for CS profile generation. Removing recursion cycles is a way to normalize the calling context which will be better for the sample aggregation and also make the context promoting deterministic.
Specifically for implementation, we recognize adjacent repeated frames as cycles and deduplicated them through multiple round of iteration.
For example:
Considering a input context string stack:
[“a”, “a”, “b”, “c”, “a”, “b”, “c”, “b”, “c”, “d”]
For first iteration,, it removed all adjacent repeated frames of size 1:
[“a”, “b”, “c”, “a”, “b”, “c”, “b”, “c”, “d”]
For second iteration, it removed all adjacent repeated frames of size 2:
[“a”, “b”, “c”, “a”, “b”, “c”, “d”]
So in the end, we get compressed output:
[“a”, “b”, “c”, “d”]

Compression will be called in two place: one for sample's context key right after unwinding, one is for the eventual context string id in the ProfileGenerator.
Added a switch `compress-recursion` to control the size of duplicated frames, default -1 means no size limit.
Added unit tests and regression test for this.

Differential Revision: https://reviews.llvm.org/D93556
---
 .../recursion-compression-noprobe.perfbin     | Bin 0 -> 15352 bytes
 .../recursion-compression-noprobe.perfscript  |   4 +
 .../recursion-compression-pseudoprobe.perfbin | Bin 0 -> 13584 bytes
 ...cursion-compression-pseudoprobe.perfscript |  23 +++
 .../recursion-compression-noprobe.test        |  65 +++++++
 .../recursion-compression-pseudoprobe.test    | 169 ++++++++++++++++++
 llvm/tools/llvm-profgen/PerfReader.cpp        |   3 +
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  |  88 ++++-----
 llvm/tools/llvm-profgen/ProfileGenerator.h    | 138 ++++++++++++--
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    |  20 ++-
 llvm/tools/llvm-profgen/ProfiledBinary.h      |   2 +-
 llvm/tools/llvm-profgen/PseudoProbe.cpp       |   4 +-
 llvm/tools/llvm-profgen/PseudoProbe.h         |   4 +-
 llvm/unittests/tools/CMakeLists.txt           |   2 +-
 .../tools/llvm-profgen/CMakeLists.txt         |  11 ++
 .../llvm-profgen/ContextCompressionTest.cpp   |  36 ++++
 16 files changed, 498 insertions(+), 71 deletions(-)
 create mode 100755 llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin
 create mode 100644 llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript
 create mode 100755 llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfbin
 create mode 100644 llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript
 create mode 100644 llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
 create mode 100644 llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
 create mode 100644 llvm/unittests/tools/llvm-profgen/CMakeLists.txt
 create mode 100644 llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp

diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfbin
new file mode 100755
index 0000000000000000000000000000000000000000..e4e698e910997a28befe523be7293cbe5193fcb9
GIT binary patch
literal 15352
zcmeHOeQ+Dcb>9OB5F{lK)JH^GwFOzyL?0nYP?SX3QUp*U2|CnAB+8;{OD7OKkg!02
zgAZBqxRq+k?!d8Zwu#eBXEJuD<Br_%A8w~fJX3ojRf;4xp4Lv?X*`ZwS+P^9QpR%J
zD6}05eQ)nQ;O=m6Jnl4cXS~D1+x@+defxHA?{;r_v}d5#p(uirLws3aX@@H$A^lf~
znGRVbtV=YAkXR~~2sa=#aD^lXsh(px=Fk~TJtXG@ih3uBf6@_x5|%?q#P7-9GGz~u
zx`?Nf5S4U#pk6OB6~Zq8)I(pd&6Y)=2V{n6C;8Dul=ZNPdcrHkfgaE^L@$zKOu2p7
zHoPv7y$g;Il&~B^LSB`rRd1T~rX3*^7fk)KRMx{fFztFfH}Yxf+d+C~$$_(;khI5i
zP?m&X%JqE}^kiI!|C@-91a?w=)%w9l?Pf~h6>Z7H@Y>F{<Vb5Wk;)%#J>0dnb!}%L
zn-1s$6aB(EaBTE#+buxv6k^SAVPZboLp~-PgH)Z&^H@e3jH4VT_5*b=VVN%pe8Qi0
zkn`KfHUKuq21m>e9{}8MBY#&FTtNI{7_PNqB9lnv;zCssQnR^eCZ~=^6DiR*(BHjT
z?Fe)P)(EwKaH~3^Wwg;mHm79<w{A|RQ`%s3IH`f==y*CsnyM~pD=7VQm}03w)G%&5
z9`ZLO>M+_%sVx^956B$$1<G-nHsIU`$h&61v5z^;7;xi!(d;`QMQY*K*vIcR-ts|Y
z@~S8DHP>tPei3=_RL)U+IWl?HbB0YyT3quZU>BFY3crnuLXgAq81gP(EEbDsC;0RJ
z!YlhDg-emix329N>_2~M3ht`N`O|($&hMxNz4`Zm-hWS_{F!*5anW&UekyWe<DUQ{
za>Dg6lJzsW`QXx8%!5mH#fy!Lj-likq97lY@@sw{$%-41(>I)v!nMfh=}i&kT;!D-
zxp`n<HW}~~FUA`eZ7J(x{l_-809)ke?v51Rp1u!San4l>h_dg@6x0LKWd~9I%ERSy
zVWynNcofdRGCf(WKl-c5^To@J;HZ%emL`ji`xlq7snGTFN`5W~pXgI2XB<c0`9Ah~
zVdf0#KLM_kWu+&(!*}lb8|4KQ`g`oZD+Xg&e^@y;`2%J0j1qkDk3MSpq7b<qh3V_3
zmHf{NKSutEND)iTg$tAKJC45l92&avkL5}xPu(aN8W)+`GrbA?x$^DrKL_2orEooX
z>aOtqeV0G@$mnE+f6FFM8QL&a;a}tNqu5~S<E6rDC&I;(O1`CV8iVl*-Mt@{J(`+4
zZSn8IE0-r{Fy1rH`@a#NqW0Bc`<#F9?v)oz4T?`0`)4|RqgcFr4>YRWr0G@A9KB`G
z^&x@%9p2Ml__y$#{e|n{-QmJV;lY6u%l^*g7rP@TT9NJ=Sn)RAJJa9$b+H&Z{gE@b
zDEK;!&w;`R1BL5b3h#uA^L`S6eu=DqBmZA`U){AYe0O+Xcz;+uGgWS&ydNMSNag)V
zcc5arTZPEx$Nh<{f7wVq%o7Wpx9<QQm`FqtT=)jiM}Q`QJ`eO5(EFiT&jWo1=*vK_
z0(}eUBM<@~41&8M3LAjF1oSx2dBDS@+_zIGhkeSzS+$-iz;WKfcavtY+lB&LYJG^c
z!_^5G&oxVt1=q_T7K?tM!t3kxHs8_MaIfc>*mTqFD_1YQ6=BpxKc55L#Rxz`>=<0H
z0u}^{X*pcjE&+aj6KUA%>-IK>z5dCXN1YEj91nY&yS=`!7oOuNdkU@`$i4)$LiTJ~
z7Ja-5S2M&xL)l)h?=eS@xA{?LkJo>qrpLSd5m&_9{*XK3?V7CZ^M+F1uCTW~>|NgN
z^@9gcX*X1gK6gTVLSTnufaX3=pGV;H2z+)T(C!ZD&JcxLRwkyisO+KjA1#-}x8RwF
zsYU+cI_iMTEu=C`J2KrwbcXo6j$?ie@jcFv%>D94F^!onD)ZbiO=VtV3{e@Tdztb?
z8;J6I{(LGkDZMMwdmq2^u{_W7_&$hfflkD=nvl#fK1F42#{rVVXAUN|2h*Qy&jiWC
z<3yGfT})h~@_ynI{JSH(p0#+;OZ|^<-{#G?`Ip10X@$QlusRU*ceDrB2ZJ3!|MH#M
zh(8j|>3Hj!6=vB^DcjNBzP_W&C<}ONaD|8^qp4B<AuW?lq*MN2N1#2>?q8nGj@{PQ
zHk!x<4kac6@n}pNPNxqBV(Ia=k!UW8oQyV+&bFnq;~JE;!>~R{MU!pG<e~A_iA?$)
zEtd1ITN7Oq53cU&SQG7B+c_Ky#)pH$@ip;vT5x@=qkUbpGZtIDHV7UIfny4vU%24t
z3olhxkFD=m4MpAqVOk*<`-F1YPbjB-Lb=bFe|S%r;uj5;Vt16rk(~_lm{!DhLXQXY
znJ&V<!ld_?E0a&95-^fRV=>DB6~ZbTO{LPqiIlA@&n>#$m3`ayh?PB1jayd6^{tds
zNTUf`1*_=TK@|pcc1(!5Cf>?eT!m4YgHZ{?TtFarPNjN!x9=S6@7*hksP+Yu?kc$?
z9~x4_yLR<})b?$=fN$}q7UwvgH+7qfG2B*L>G4EPjc20cnmUn&EdVV8GCbdtFK*mx
zXacke4PcTnUaYq-qeYX6(G<ef`sx_<_k%>O@&CN9e*1mJ;|;fY`@$=2$D7WxA8&T|
zF841}I~jh5xaDK!0DiA{(i6gAg||n=Trx%aYw4Y(+StSG6-<v4{aueeyf6O`<_S6f
ze42Rz6@T0FM3u2;YQx8kz5lI##aQ`&@a*LA%hc|kU>;LD8NTn9e9ZmF_uW%;&)_zT
z{*LZ}*#e&r4<Ui{7+fK`1AhWM2NxCtL)r6f_?0i)PeZ0=5PqU0EF2S*!_5q3&mW@X
z68L!-u9}tb^A{}X`XQ8*I^5t;+!srEQTtDjch$IlgiwuvE%U<hEE$lTJxjqm559GO
z2{V5RnRSq<!+L*8f;-UAoO<UQK+%mlbl+VYf?pw8;ZhprIq}(tt%0W<wkJqz(eSTO
zQW}+6uOanerL$fyWyNgV-^GM`FUlUQLiqv;)uCe(#C!_j1x~NBUwXL&SS2t2m}HKk
z40w4zFiT!OMa(EN!OOkCL@#CgUL@v5WV-7cOImLdV;N~ZL0V9x<|&?#EP!H@vdlZz
z<@EX%Ec7-hbKrf$5dd(GJJkGV-7P_Qzj3;RV*zGr3J)kEIQQ}HmL{<13MfwRZ4I|K
ze6gi}?xq$$tj^pnp+o&3UAq(tUv6js+|x-G>TXtobJ6%v!`_BHz^T_wqP+&Tr`wv<
zX|Ofh;1buc36`f`Fx-r;HM+ntXp#zsYJ?whilThzos9$_>|I49Qt&-V3&fxdsjf7r
z=et_!J&Ro)cfjS|;A(WYx$1!mw`Y^fQ$N?`hJwfaBG5aaefa<sob?-^7<V2DxD>Z@
z_K<7dZ7$)t2X@ju|LJPD-Q@%o?~uzG2Zc0P1%-gyd1we#8{$%x>>zUk*l~LnxPR_y
zS?%%|op9a<M%>Q10kHl8TEAFf9jzIyqroBevTS{s(YnXI8mk3wz5+CEkWRb*qpA}_
zmmReb&(Fq6uGE)pW9f0N?cSu8XxlT;IvC9!l#4@n3y+U#sWw<jPGqz!t~%RtS~k~e
z07N_rV<IQw!y=wei>ycp`E3l}i!t$jh9dNDh&n2N+bMr1Q4rZn*}KaSXQKqwi1(D;
zPf(#m9^f1fR7i^0uV>1?$0*C9PSK=1@pS_KlBgo?e2=pwQ|urygkb4sSX5MqTq4%h
zl1N4SS5pOj<%-Tx_VqGUs+2M?eaQ4^Nd)I+Q(}WVN+K2R{VlPcFNsvRXEJG9zzP4P
zBUHh2!b9<bE$%FRmp3XoVY=b{Ei>*a?Q@xNx8VIPGhQorAIyw<gx&s;6TX1(zLQzL
zUhw{v8E+`<bD8m3g5TZE_-xTm?-FL*TUzg%@y61=p&9pylclFknFAwOoVCU0h~~Ob
zS&(v0SZVV<q><@_uQ$B^XvXK4&u<tQPFN}PcV`0(uQRY4=sRG9WZnt8eSTZKMJ%<&
zc~{hI$|)9#<-RA)Ae5Y?S;owAikr;<m7L<1DtKuHWa2x8-&Do|-ZwSk-J+F&FA~Dx
z6}<Pt{H;~+DB-+k#`5_p_=zg`Usb`+RKfB63;nd#`>QJWENrli{4&CM9|P4fZ6uu6
zyE-ZazJ1x*$ydQ2uY!NK3NB|(8$Um+BEJaU-|hU_PzB#dxV62<3AeWQX~6Bq$#+Pe
z*WetV7YXNe3gd4QZtbu4tKjpU_U-DZg72z=r>o$PRl!eI!GA!wwY_f<{-9+ZQ)+bo
zt+=0XYkW2WZWreW;0`#9Hf5QI?;!k?1>Z-waECbbl4b!{;A;-gS2&z7Js|PQ`SWSA
zQ~G|Si;M3_`O10tRlx21uhy<Nq@6k9?)nf}E)zRDS3n+jTCMZ^uce*JdEVvH<*m<)
zW{FomcUA$e*v$W7z~^B6seh$9nEonc{8)u0JtOhT=fybL3Asa3V=?JX0Pch53-=3~
z`wGfCDxatKN;@o7reCXq|25gSKEJ*PxWBAk%AEte$>9^vTE_FgAP%1Lb6j=@Xk7vv
z<HPrbMwWg-xN)V9tm}juw+Ij)gaPS;@wwdcJbfSKVLs&f3W_p4OLllZX(PM|7O!Z>
z8qb>nM?Z~gbyT?(@FqudMZE#Q{bbWfu1bGw-j~|)*$f_9Y=f^7ZE#vqUEA53(MHCi
zIsF7<TUT2kQ2ql+INlh`<br{;2+&swHIahPU@>c1)lwr?EHIYNSuvEe!vlF(^d8ib
zM{E_UF$40XV1B|@sR}Qk9ZuN0WoCeSE)i2RS~49ow%3fC+im8V+iB)i#=*?DiHBK0
z8mNqmS*S8TX1+86P$p-Lp;^WfFEdjbFvm?DN$Nq1<}^Hyi9wu5Xu2!qj;bbx<48-!
zTjelI?<y+*STXj>7&>AnYwiQHd^Mg;>@A9waW4y0#=Ok4k9V1GiFKJ_7iayfY%H7C
z!y>z}90EE@TVk8yV_?Q1&=q}CHZ9qx3)4V0mygHcRwy6ARdeI;fe{YjX5j$tNLn3D
zriY_RbtIS0WYuW?u)tl6q?Xf00_!@~ciIc7Sce+TWTHo)nYqjnp??q@$&Zg80Tn|@
zw$vnA+Fw!C-kssCJ!;RkEh?P<HA{{Nb<5su;jR6f%^W>ustV@_)Sd`gh-}#@)V_i3
z-QfXsdvEWqo<Vgm+&$2PR^{<zj4As3u?Lc2vqshMZ~&cc4uwodnXLz%t!&iPPcuWU
z=A+H(NH(pGMN=bq?74qCNR1>?YCfw$E6Wbz{Uf)FVDqTTy$0gL*(}u}ca&5W4pnmq
z@wB!55$Ztp$apR~EMVMZbUMa4`3)E5;}m>c4vePq0odT0&@wrg*CiD0?r+USM}_2#
z;iu`q$dMG-(`hcFbLaqush|Q6X5%O-P;Md#Gr4SOAO~kIL_l^+Ad{AX4`^dFc*aIR
zmrJ^aK8kb=&V%9Tcmkenqxj&0pMb1DHwK{+c;aIm#;ESsa6XY72}E<bOky|>n|9Cy
zZ8$#)LoW``OFbVQ9?ocoxP<$<oQH^U7M?K1=l7@0<GCkzMZwP{GV#6<|33@w6B1j>
zavtdtxN6~IecoT<{}=HC&zL;M0_4}iWz?T`gaE<+6Op9iU4`GzbgmE)$l$vZ*U$S^
z9$ddb`jSpcXAQW1wzCEJ*WoM^>vuUrQlpFXx&83kBopgXQEY<@?Dd!G=l!oC(w`zh
zY$L{*_4%9??f{}F*U$T8)1)u=8-ULVZ5R`7r{dWx*601S{nWr&B+UM^9Mglq$Fp8s
z=6$yb()W|T)qjG;eUL#>*601XDbhbcf?U6~{l`hao8+$5gk<g->2sV=AJ4Q|^&bTc
zV^Xs33PB0v=!Kfk^+^zZ$SfB{0dhwDcG7PreSb+(&-kd!6wfjn_4&Ph2kG}vKe8Ft
z<MN+d^!XgYx2U1~-I(RA{y$0j9DiQNoFsi~`#FAW@1#Ya_bvJVsPKcKF<I;XE-<nE
zTtDw`HrMG|5o8*Ywf<AU^TNep;&m<mUzJs!%S>MaT|Aq>`n-;wp}?^mh{(icrauN0
z#}4b4|1VCLzexV$_{YS0+>dVph9a!b`>z+#DM)_#TmE5vrauFgQNOfLOBMVk{dX+-
zydL(GzO}!wE@S#76wnv;pVya5KcW5ypo=!!;W8%H|92w@d9I(S6EYlI6y<&78alW&
zi^dU~v9_lL^w}26AobB!bqx<pVp?T!pVW$>=scSE|C#idSY`2JkrhMPpOSve!n72{
zCepXYua~yMFLA|0S)b`0q;C-u%W2E}A6U?$&+^E&gh;Zk`E`l0dY#@1WC$)Ew>&Rn
l_wd-!CH3E-RXUCE#7s<!JMw&<o&J+}#sboAi-M&n{s*)#`Lh53

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript
new file mode 100644
index 000000000000..3ec8f44cfef0
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-noprobe.perfscript
@@ -0,0 +1,4 @@
+PERF_RECORD_MMAP2 3019402/3019402: [0x400000(0x1000) @ 0 00:1d 265650677 1451231]: r-xp recursion-compression-noprobe.perfbin
+
+	          4007e1
+ 0x4007d6/0x4007e1/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x4007c7/0x4007c0/P/-/-/0  0x400795/0x4007b0/P/-/-/0  0x40079c/0x400790/P/-/-/0  0x400801/0x400770/P/-/-/0  0x400698/0x400801/P/-/-/0  0x400673/0x400696/P/-/-/0
diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfbin
new file mode 100755
index 0000000000000000000000000000000000000000..a3dbda2f0b3ecf6905368f3fe3b81f9c34adba7c
GIT binary patch
literal 13584
zcmeHNdvF`adEYzWK#HV5>P<#<bxx(2lueMJNJ*4pTcAV{97B)jt;W(Bj{^w{1ZX@|
z%A<@~(N2e{m8w&>NjgpJKaz>YX<B#ErqkLP$)(25<TY{Xw65JYY9qz;qY0hFs_MjI
zzi;pRfWrZ2$<8F{A6*Ie?f$;U?tc5--tKZA7>sOJ6-97T#T^1mPb}9Zv_(FJIJ1^A
zK(>gDq7j~JMZ54p*())NzeVUW!*o=&aHjQEQFwr&-dW<GRdvZ>`Wvz&1XFGyQq(Ia
zow64FEH(%dQ_*6TQBQ;46U68KF+D@=U~1Pxl`RV_J6k$*nPGaK8phPA2adARW=l^H
zZ6~;cDC=P}u)f_?-)^dp=^3hzDf@HJ<w8P^!}Z!p{U<YGH|aH@b09TodIgQhk`PSU
z_a6d1GDQ+MMo=Gr7s%fiXxy3VWQHl%*OgAi*7tU$6P@W)rZ~|#v2lIp`rbf38|abt
zaG0@ALp%3N3m9Kig)F;0P-k%s<`Tg48u<HN@H<@aLx9(^kF8VWKZ_b5N1-Z;?@Pfb
zsv-p#Cu}GB8|nKPB#JnX1^Nqz68ny_wG#S9Cc^-)#*KU-nkyJ%(Nsnl#>iMULx>@a
z;e9)dgqbr(Qu%_J+qYwDI-4=~MPq5x;??Ag=TezMQlM=dp!H%XGCZ)==n3=$`nc+g
zYjv6BJ~2LP!|mfYXTwYF2c@evoMQ!T>GYn}L7qYzVepCFsu20&n4Ze(YZ48>Tj|*x
z1{r*(KD$VW1kmS!o&@>`4B{g|{~G91KraEk1avh7N)tFb0kjwB^FU7mT?#x*%KdwU
zGSQ-}Y^tlrcNTG+(>8+LE)>{SCx;#JUij?>+}_^RfZ@A$6>vEhhWsrbRktqm?Er(J
zCSS`iP~f9n0e*8JcMDL#atBE6uCm;p0UPc0+vT=Sc|Y#C&k_P0?OlLh3-ohym0Z3g
zw+(o&!>=9WOqAR1Z}|gt(BJxT&!AsF?H%;DKdgoQ-5>RZ{Trw1hWuMH{*57jcgWv9
z;MWKItpom+0e`*p3;J(2*s=E?P1@5>nt$`H;C~HIKX)`d{h#5fOLM#TMNVtWVNk=T
z7d{FG!l!RP1Y~}h3NHQW8I+&-MmYg{5%ev5T5CYkf3>hI{QKGuVAR47oi8kyzu3HL
z66>2G3iAK)TPWZ6B9e}4;is>8!qao%r)TdBE6;?VyH;2V2EIrJ>gO*en^%ETSRd=3
zy!|VnD~e0@ho>*k-akJ-|BUtpK$IghXFv|5lY!<{4`Q_f<zM?)xxJ_|HQ#u0!E-;F
zodeAyGgHsb&*EEi1XQ;O<WFCnnpaQy&nm^Hsq^#G&(6MV3CjGFG7pWi?4x|>z}_dp
zMFRDwzc>4LAQ4iYnVM04_h*YBc8af^6PhlzO<xI~pT*ezaOfyjHVG=SK8OB^chdg`
z=mgJ83$jz#ztF+q>3<F#7@oco+8>&JI|L)R<|%j!_J>b*BHbJ5xE!8(y?%BGl*3QI
z?J2AZzC`s$rr(N8U)eVOqtN`)e+@&M!~OqOd^J3Eq5i{1LU)IbgpP)cnKPCCoB=af
zFWQ8$dILfq2V0&0HtNlkd9nn;wlv@O>dg>MYhH&Z`fGj-mPb+Im5b2vdsgUD`V3KU
zmrOXHV_IgB(i7(+OcP#RQhawY-6Nku(C1sUQh5c@7f7CGFXnrQUr+NL&Lfz(9{w)O
zs=CA*<x`aG^SkzQkHv>+P^Kc$H?PfSF?*KyUcxU>xsA%<4qfI}Qknb3-zr`&$vF&U
znBPS5v#q+!Eu^xy%_>98%lap%zH3x|kjgLwlFxo8^><BpedSbKPwg1mx^<J@4y%g}
zePf_E5Y&6Rd%C;(dp7Frd(4C$jutGuGuR>G>1bv|KW^soscc3M_5`{E-FkaIKf0-_
zYa~?&98Zl0lF_&s%Vv)S;@PpTM6?h^PR<<9=DV`_F%!z>1blHb(R5cjeSEBQJeR%K
zj2H9`ebK&TaNWk9zG(0I-dH@Cj0I!KzT^fo*dOod-Vp7L$Jeb7_H~FzDs#-tZPFu=
zZNIiQRCsNj^jfe#80-n!y#{zEv{wj>J^3+&!5j*$R@RO7_pAf#GVMi}{=#w;YnO>u
z*^A#`z3*1;diCaSu-<nocfESe8?5);$~o%AcfMkLqruWX^LsSLwPcvbv}(M@t@bmY
z=_;sCCM%w_TrrbL!J9b}kHgDq4@V)Kve8T?8%t$s%3}RxyK9Gb9TaN^0pPh77v4jm
zwJORqOF?)@>{eK6$B!8>LF7k;fHi}aEctlSfVaE=Z#hgGI6?AUNcOhx+OuzX`(ZJU
zO3O)=?8+7Pp+iP!@7_U>+O=~p@Ez@`!*2!8_m<wx=x(R2>{zN`By-U*(-_afO5V(Y
z4A0Zl79T9r#3Df9TWTfQhnubb<;-Y0HIhMw+gM$6ywgtq|NmJ1>c@)X4UdB$erv{?
z#k;j;yw!SldCq29ONPJw*J!?|CBy51Ci))Nl9{8J`V(4<xy9bNV_y1o#~!|yzl3>0
z&Oh&Eo<PN4^*rGcdlh|nr`Y>N^=riwuMe2MR~e3L&pX5}$MqtT>HF4AJD<3c{N^Js
z?wQ4JrkadR|KK}cni^U)bJMN6nsFJeCZm7;mDZOc|8Vv0@I(FIL@7@)n|<rMr(a&5
z_~VygWh}oGUU{$sOiaA|SHynK;!KXoD}JKvf0RqSZvjEh^3vTBw3NzEow6Sw3W}WV
zDM7pCQ34e4xD1mbc3YY9^KQ!i9#KUcEi<j$K>}FR1D~Ns?<MddqKd$8U`2YIO3xCl
zA)+i&*7%ob9)HV<mHtJ_V)%xt0RR{KwzPg?pv~jo)VR6vmbT#~cea7MHKDGspker!
zd#6t+BDiD`OrM%fCkQNGFGymh4rCNdrv76fuD1T+Kr3wf0*Ynl&NdwerlD7q%{H!v
z!X1r`fH&F9vH&(@;Z4%!V&9?0!;J?UzdQiV_CuAeSfxlngRFg9y@GRHQ)#w23#G?s
zw){HeRW9IID+fM@PwxUym)lHeCl$SGA^QX=ddHyf_f!z-dCJ{_(g)xnlm*g646@RM
zEK03^VH0TYGMmumDA$k52VRAz?+ftwF=oC6kGJ7hhqczxOkS_-SMyL_fbCS8l?L2K
zD=k=U1MY;atUwnE&D+#XY8Z{m0o<Z0%TabAR<(tgs}Ww|St0aBzj9R8?E@LC+p2uX
zQMpJ)1-yT7Y?~LbY=R#gRhxmh$O!og{@>UZCS#&&G&^Q?{Z86Ubsda!?u+J+$vq}4
zK9eJ6rVDnY<2f^ro7b*_nJ;wOfX?x}SxjU>G-e9)mRIQTR21cB{)I@uL%2j#%Do2w
z2Bs8Q%e4B2yELEggIZfaTLz=?my?r+1DfLdZLPUeYXIPH0RXn*JFJOOkRFlJ^}a5k
z0j(J*N<GU`MXl{NZ7Cq<7&)$qzmPI$y5UW&&R5VHeVa8uSoI{ihHg!{tcj<}f<JT<
zd_}E;uJK$8dGX&Apz5UXOfJw)sp@3Cce2rUs(~lz2BA!T{sGUVrcHY5Pig97nr~8g
zC)GO6^8^p(CeJ;fg5W9DJE?glRo^KOWOy0}mrlMdR9o$nqPkk?<XR6L510!C?}=E(
zTDMo@Ye=5gYb@XIf`7;bzt;t~W_5pQPtW!rb&-G41^>1S{tDr|zmP?E$iM_YIQw;z
zbWk<k?}8t8!S8p$@kko|@2vNn3;ul<-1@UcwVj*g3dt{={S6cDtoI+`L!hc<d0yv@
zfxK6|Ay*9SGo%xMD`JJ<^F`)81h@+Q;x#wpe@eK{lQ}WINccStd`8-D6YX?<DZ39k
z-;wxY!RLl-|9{91pSLou!OX9~j+)OQ8UGS=P*EGjRj<yFq^m)`R)0J33j+P(JpwC*
z2<NpR<A+`F`&{tTF8Jdv_}5+VmtF8W`Jsn6p^p={Q+L5{C*0ZJy9xiU!_Em8{F5&B
z-QwzRKwg0pjzgNxO_cOLIbPNAwip9dZLNyK=OkV|?w1I6p6k9T@uk9vn}Dlov*5ib
zH|m(gTLkYv8J~gnFNQd|Nd7^ym|i2C?-g`eu=p|I>}Qj36%^5qeMy3>I>2?Y9RB#G
zlcYu30bc~m_&KkIgISq2NIO)pkzv4FYS=jdxNei<>_=SiPr2ad0mt^Ar}r9HeVAql
ze~fV6JUm0VeTj$iUz0fI94R1ApvFZY14m<l(7<qKAvz)gi4&Rpi7^ov$rJ;0(q2(8
za#nE^Puc?36LkuJ=gf2z4Nz`8U4UE`58*|?9HpbL(F90xNdyXT950|PDR?G<h=5eX
ztP#!Sq9-gS=fQe(EER_~WI@3)T^NIdKlt;xQILN>N0wzImd^|PJHVI;t47(QSTU7O
z1fqpPE)^@n8Z0o1X0WeSoFc3;kC|zxzb0G$ZNCWatI0Jdus^j}&_B7-!z0O<33mYU
z(3yl8D~=fHY$+R!#d79xW~4I7Ea#=8Ngh_gmF7eM#tysybQA=woJB=|PF@Z4H7uST
zWy8!QoS5aEN<EaTg$K(1s-fV<43ykhL&=2~u$)^%p^^dWg;d;tpv=bY{jJ0+`(4Sa
z>}Msf+7Ffd8vdvhkOr##QYlpJpGv+o0#L4ip~x<&lyUg0k|_;T`prnBEvH2bCSF)U
zCyu8~t10D5+{7@B;<-Y;P)sJl?S=uDTa3u?-hGB4$~RsNerfZ9ue#VE!$@R}k#sf|
zO&f_qHkUV|#R-AirL<Wv6M+pq{k>qhhLBO7IG}5V+zDYFxF?EZV<$kxR+2A*HLs9p
zX=7RPq?t}uy)R`9omX~98Ly0+GA^AhXB-}>GOtFwl$lboI#$X8)p1hhN#&{-Df1oi
zQD#U*M{HOZo8tMR<rZnS>;k&?RAN{7$Hv4#s*0zwX~{-i!`Qwjv}4d1+_?>+$6-LY
zM4+&4S=q&L%CpW2@QcCxp~$X*P{i1^ef!?Qea61fKx7baX6*ugB9$?Uc@tYueUr;>
z!*Q)kRwQE(G%@h0<?rdb7X)}dYm7!S3Ah!8XCN_o3_`cS414@q<4^%f|7|<GGqhuP
ztK-xEUW&MXz#nWAWa9gze9x5EA;gxloJWcWM|fhy`g|{u?<?xSvnO7EF}4CSc*?~3
zeD7kGfHNeBXUW)3q?qtv3=^Ku;hhH7=X(zma=l;vI~)~D^|1VHppSPXxXkw?9wY@F
z_?WnUXZ!I;?21jF?|qylefA&M&+V5+;V}q5Sjv^!#rH%mkite1WPPrm>0Q7_9=D(G
zrTm-}_+AQ`sT#k%z(8B9&-ZL5sDgV)k?ZI7v;U8f{s77Gy_*+EpXE>=^G^K{z;Imo
z%$wKgbELl#c=p8ljNxS#d;FH~<606C79>h$OXx=+XV>Tb)gc@dND*Y(lCyq1pFt5g
zK$eO3F??^jj~z>V{HudKorZ#4pZ8O<RFSj)&i4P2L!b9uUVJej@!xr@k~P+xNBSxF
z+3VNabcBUY`c_Fo^5Ja%X8^^&#c_P_`2*kocKVCUOdkhbzfGUdBf?Za%YleYTxR-Z
zK+*SHKkrldKDhJVH|udf&VerWpY{2k?ZpNFA?fm2ey~0x7a(KTFYP~ZQiJ3iFO;>X
zuR+1C&*w(<q;HqSytDonucNP{k}Wy)zYjdbxc_{v)K2v~+v_aPf-Yj+@Uth@k2@5&
z$n*_R!taMOJxmuUCL9c>KO4bNEqyNq{`K_Ny6B(0LH)3c{!edEAN^CS{b_tDAl>F*
zI{SYD^x=P1`Xv1oa+ibSlzrGm|7Ftea40y6;u9|VE%;%F^t?q%PpAIpT=XxI{-{I2
zQ51hg`p)=^v{_Zom1?#!tk3i*(szi758_|BApOuHr6=n#@KqQ6wKQ>mngxkE_5TR~
z$0tHcewFlJpbtw2pqt>w?=OB!G5tH}^Sf&y*7LFVTXfD*p`$2fUG&eaw)C&jcaUv6
G_5T+oAnot~

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript
new file mode 100644
index 000000000000..91a69e2c9dd0
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/recursion-compression-pseudoprobe.perfscript
@@ -0,0 +1,23 @@
+PERF_RECORD_MMAP2 3367317/3367317: [0x201000(0x1000) @ 0 00:1d 238458915 1121070]: r-xp recursion-compression-pseudoprobe.perfbin
+
+	          2017db
+	          2017ba
+	          2017e5
+	          2017ba
+	          2017e5
+	          2017d9
+	          2017ba
+	          2017b0
+	          2017b0
+	          2017b0
+	          2017b0
+	          2017b0
+	          2017b0
+	          2017b0
+	          2017b0
+	          2017e5
+	          2017d9
+	          201847
+	    7fcb072a67c3
+	5541f689495641d7
+ 0x2017cd/0x2017db/P/-/-/0  0x2017b5/0x2017c0/P/-/-/0  0x2017a7/0x2017b2/P/-/-/0  0x2017e0/0x2017a0/P/-/-/0  0x2017cd/0x2017db/P/-/-/0  0x2017b5/0x2017c0/P/-/-/0  0x2017a7/0x2017b2/P/-/-/0  0x2017e0/0x2017a0/P/-/-/0  0x2017cd/0x2017db/P/-/-/0  0x2017d4/0x2017c0/P/-/-/0  0x2017b5/0x2017c0/P/-/-/0  0x2017a7/0x2017b2/P/-/-/0  0x2017ab/0x2017a0/P/-/-/0  0x2017ab/0x2017a0/P/-/-/0  0x2017ab/0x2017a0/P/-/-/0  0x2017ab/0x2017a0/P/-/-/0
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
new file mode 100644
index 000000000000..47e0a51a4261
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
@@ -0,0 +1,65 @@
+; Firstly test uncompression(--compress-recursion=0)
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0
+; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t
+; RUN: FileCheck %s --input-file %t
+
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0
+; CHECK-UNCOMPRESS: 1: 1
+; CHECK-UNCOMPRESS: 2: 13 fb:11
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0
+; CHECK-UNCOMPRESS: 1: 11
+; CHECK-UNCOMPRESS: 2: 1 fa:1
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0
+; CHECK-UNCOMPRESS: 1: 1
+; CHECK-UNCOMPRESS: 2: 2 fb:1
+; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0
+; CHECK-UNCOMPRESS: 2: 1
+; CHECK-UNCOMPRESS: 3: 2 fa:1
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0
+; CHECK-UNCOMPRESS: 4: 1
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
+; CHECK-UNCOMPRESS: 2: 1 fa:1
+
+; CHECK: [main:1 @ foo:3 @ fa]:14:0
+; CHECK:  1: 1
+; CHECK:  2: 13 fb:11
+; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0
+; CHECK:  1: 11
+; CHECK:  2: 1 fa:1
+; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0
+; CHECK:  1: 1
+; CHECK:  2: 2 fb:1
+; CHECK:  4: 1
+; CHECK: [main:1 @ foo]:3:0
+; CHECK:  2: 1
+; CHECK:  3: 2 fa:1
+; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0
+
+
+; original code:
+; clang -O3 -g test.c -o a.out
+#include <stdio.h>
+
+int fb(int n) {
+  if(n > 10) return fb(n / 2);
+  return fa(n - 1);
+}
+
+int fa(int n) {
+  if(n < 2) return n;
+  if(n % 2) return fb(n - 1);
+  return fa(n - 1);
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 10000)
+    s += fa(i);
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
new file mode 100644
index 000000000000..86afe6c632bd
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
@@ -0,0 +1,169 @@
+; Firstly test uncompression(--compress-recursion=0)
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0
+; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: FileCheck %s --input-file %t
+
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  4: 1
+; CHECK-UNCOMPRESS:  7: 1 fb:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  4: 1
+; CHECK-UNCOMPRESS:  7: 1 fb:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  5: 1
+; CHECK-UNCOMPRESS:  8: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  6: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  6: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  6: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  2: 1
+; CHECK-UNCOMPRESS:  5: 1 fb:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  2: 1
+; CHECK-UNCOMPRESS:  5: 1 fb:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  2: 1
+; CHECK-UNCOMPRESS:  5: 1 fb:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:1:0
+; CHECK-UNCOMPRESS:  5: 1 fb:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+
+
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
+; CHECK:  1: 4
+; CHECK:  2: 3
+; CHECK:  3: 1
+; CEHCK:  5: 4 fb:4
+; CHECK:  6: 1 fa:1
+; CHECK !CFGChecksum: 72617220756
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2
+; CHECK:  1: 2
+; CHECK:  3: 2
+; CHECK:  4: 1
+; CHECK:  7: 1 fb:1
+; CHECK:  !CFGChecksum: 120515930909
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
+; CHECK:  1: 1
+; CHECK:  3: 1
+; CHECK:  4: 1
+; CHECK:  7: 1 fb:1
+; CHECK:  !CFGChecksum: 120515930909
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1
+; CHECK:  1: 1
+; CHECK:  3: 1
+; CHECK:  5: 1
+; CHECK:  8: 1 fa:1
+; CHECK:  !CFGChecksum: 120515930909
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
+; CHECK:  1: 1
+; CHECK:  3: 1
+; CHECK:  6: 1 fa:1
+; CHECK:  !CFGChecksum: 72617220756
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
+; CHECK:  1: 1
+; CHECK:  3: 1
+; CHECK:  6: 1 fa:1
+; CHECK:  !CFGChecksum: 72617220756
+
+
+; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Range Counter:
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
+; CHECK-UNWINDER:   (7a0, 7a7): 1
+; CHECK-UNWINDER:   (7a0, 7ab): 3
+; CHECK-UNWINDER:   (7b2, 7b5): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6
+; CHECK-UNWINDER:   (7c0, 7d4): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8
+; CHECK-UNWINDER:   (7c0, 7cd): 1
+; CHECK-UNWINDER:   (7db, 7e0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7
+; CHECK-UNWINDER:   (7a0, 7a7): 1
+; CHECK-UNWINDER:   (7b2, 7b5): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6
+; CHECK-UNWINDER:   (7c0, 7cd): 2
+; CHECK-UNWINDER:   (7db, 7e0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7
+; CHECK-UNWINDER:   (7a0, 7a7): 1
+; CHECK-UNWINDER:   (7b2, 7b5): 1
+
+; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Branch Counter:
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
+; CHECK-UNWINDER:   (7a7, 7b2): 1
+; CHECK-UNWINDER:   (7ab, 7a0): 4
+; CHECK-UNWINDER:   (7b5, 7c0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6
+; CHECK-UNWINDER:   (7d4, 7c0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8
+; CHECK-UNWINDER:   (7cd, 7db): 1
+; CHECK-UNWINDER:   (7e0, 7a0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7
+; CHECK-UNWINDER:   (7a7, 7b2): 1
+; CHECK-UNWINDER:   (7b5, 7c0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6
+; CHECK-UNWINDER:   (7cd, 7db): 2
+; CHECK-UNWINDER:   (7e0, 7a0): 1
+; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7
+; CHECK-UNWINDER:   (7a7, 7b2): 1
+; CHECK-UNWINDER:   (7b5, 7c0): 1
+
+
+; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
+; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
+; -g test.c  -o a.out
+
+#include <stdio.h>
+
+int fb(int n) {
+  if(n > 10) return fb(n / 2);
+  return fa(n - 1);
+}
+
+int fa(int n) {
+  if(n < 2) return n;
+  if(n % 2) return fb(n - 1);
+  return fa(n - 1);
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 10000)
+    s += fa(i);
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 64a502be59a9..d05c665f8583 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "PerfReader.h"
+#include "ProfileGenerator.h"
 
 static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
                                     cl::init(false), cl::ZeroOrMore,
@@ -124,6 +125,8 @@ VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
       ProbeBasedKey->Probes.emplace_back(CallProbe);
     }
   }
+  CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
+      ProbeBasedKey->Probes);
   ProbeBasedKey->genHashCode();
   Hashable<ContextKey> ContextId(ProbeBasedKey);
   auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index ce228a781538..f769bd592f87 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -22,12 +22,22 @@ static cl::opt<SampleProfileFormat> OutputFormat(
         clEnumValN(SPF_GCC, "gcc",
                    "GCC encoding (only meaningful for -sample)")));
 
+static cl::opt<int32_t, true> RecursionCompression(
+    "compress-recursion",
+    cl::desc("Compressing recursion by deduplicating adjacent frame "
+             "sequences up to the specified size. -1 means no size limit."),
+    cl::Hidden,
+    cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
+
 using namespace llvm;
 using namespace sampleprof;
 
 namespace llvm {
 namespace sampleprof {
 
+// Initialize the MaxCompressionSize to -1 which means no size limit
+int32_t CSProfileGenerator::MaxCompressionSize = -1;
+
 static bool
 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
   return BinarySampleCounters.size() &&
@@ -319,26 +329,16 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
   }
 }
 
-// Helper function to extract context prefix
-// PrefixContextId is the context id string except for the leaf probe's
-// context, the final ContextId will be:
-// ContextId =  PrefixContextId + LeafContextId;
-// Remind that the string in ContextStrStack is in callee-caller order
-// So process the string vector reversely
-static std::string
-extractPrefixContextId(const SmallVector<const PseudoProbe *, 16> &Probes,
-                       ProfiledBinary *Binary) {
-  SmallVector<std::string, 16> ContextStrStack;
+// Helper function to extract context prefix string stack
+// Extract context stack for reusing, leaf context stack will
+// be added compressed while looking up function profile
+static void
+extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
+                          const SmallVectorImpl<const PseudoProbe *> &Probes,
+                          ProfiledBinary *Binary) {
   for (const auto *P : Probes) {
     Binary->getInlineContextForProbe(P, ContextStrStack, true);
   }
-  std::ostringstream OContextStr;
-  for (auto &CxtStr : ContextStrStack) {
-    if (OContextStr.str().size())
-      OContextStr << " @ ";
-    OContextStr << CxtStr;
-  }
-  return OContextStr.str();
 }
 
 void PseudoProbeCSProfileGenerator::generateProfile() {
@@ -350,15 +350,15 @@ void PseudoProbeCSProfileGenerator::generateProfile() {
     for (const auto &CI : BI.second) {
       const ProbeBasedCtxKey *CtxKey =
           dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
-      std::string PrefixContextId =
-          extractPrefixContextId(CtxKey->Probes, Binary);
+      SmallVector<std::string, 16> ContextStrStack;
+      extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
       // Fill in function body samples from probes, also infer caller's samples
       // from callee's probe
-      populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId,
+      populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
                                     Binary);
       // Fill in boundary samples for a call probe
       populateBoundarySamplesWithProbes(CI.second.BranchCounter,
-                                        PrefixContextId, Binary);
+                                        ContextStrStack, Binary);
     }
   }
 }
@@ -403,8 +403,8 @@ void PseudoProbeCSProfileGenerator::extractProbesFromRange(
 }
 
 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
-    const RangeSample &RangeCounter, StringRef PrefixContextId,
-    ProfiledBinary *Binary) {
+    const RangeSample &RangeCounter,
+    SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
   ProbeCounterMap ProbeCounter;
   // Extract the top frame probes by looking up each address among the range in
   // the Address2ProbeMap
@@ -413,7 +413,7 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
     const PseudoProbe *Probe = PI.first;
     uint64_t Count = PI.second;
     FunctionSamples &FunctionProfile =
-        getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary);
+        getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
 
     FunctionProfile.addBodySamples(Probe->Index, 0, Count);
     FunctionProfile.addTotalSamples(Count);
@@ -446,8 +446,8 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
 }
 
 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
-    const BranchSample &BranchCounter, StringRef PrefixContextId,
-    ProfiledBinary *Binary) {
+    const BranchSample &BranchCounter,
+    SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
   for (auto BI : BranchCounter) {
     uint64_t SourceOffset = BI.first.first;
     uint64_t TargetOffset = BI.first.second;
@@ -457,7 +457,7 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
     if (CallProbe == nullptr)
       continue;
     FunctionSamples &FunctionProfile =
-        getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary);
+        getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
     FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
     FunctionProfile.addTotalSamples(Count);
     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
@@ -470,25 +470,26 @@ void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
 }
 
 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
-    StringRef PrefixContextId, SmallVector<std::string, 16> &LeafInlinedContext,
+    SmallVectorImpl<std::string> &ContextStrStack,
     const PseudoProbeFuncDesc *LeafFuncDesc) {
-  assert(LeafInlinedContext.size() &&
-         "Profile context must have the leaf frame");
-  std::ostringstream OContextStr;
-  OContextStr << PrefixContextId.str();
+  assert(ContextStrStack.size() && "Profile context must have the leaf frame");
+  // Compress the context string except for the leaf frame
+  std::string LeafFrame = ContextStrStack.back();
+  ContextStrStack.pop_back();
+  CSProfileGenerator::compressRecursionContext(ContextStrStack);
 
-  for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) {
+  std::ostringstream OContextStr;
+  for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
     if (OContextStr.str().size())
       OContextStr << " @ ";
-    OContextStr << LeafInlinedContext[I];
+    OContextStr << ContextStrStack[I];
   }
   // For leaf inlined context with the top frame, we should strip off the top
   // frame's probe id, like:
   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
   if (OContextStr.str().size())
     OContextStr << " @ ";
-  StringRef LeafLoc = LeafInlinedContext.back();
-  OContextStr << LeafLoc.split(":").first.str();
+  OContextStr << StringRef(LeafFrame).split(":").first.str();
 
   FunctionSamples &FunctionProile =
       getFunctionProfileForContext(OContextStr.str());
@@ -497,17 +498,18 @@ FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
 }
 
 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
-    StringRef PrefixContextId, const PseudoProbe *LeafProbe,
+    SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
     ProfiledBinary *Binary) {
-  SmallVector<std::string, 16> LeafInlinedContext;
-  Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext);
+  // Explicitly copy the context for appending the leaf context
+  SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
+                                                   ContextStrStack.end());
+  Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy);
   // Note that the context from probe doesn't include leaf frame,
   // hence we need to retrieve and append the leaf frame.
   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
-  LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" +
-                                  Twine(LeafProbe->Index).str());
-  return getFunctionProfileForLeafProbe(PrefixContextId, LeafInlinedContext,
-                                        FuncDesc);
+  ContextStrStackCopy.emplace_back(FuncDesc->FuncName + ":" +
+                                   Twine(LeafProbe->Index).str());
+  return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc);
 }
 
 } // end namespace sampleprof
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 29f528026a0c..14e58fc9c895 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -50,6 +50,7 @@ class ProfileGenerator {
   */
   void findDisjointRanges(RangeSample &DisjointRanges,
                           const RangeSample &Ranges);
+
   // Used by SampleProfileWriter
   StringMap<FunctionSamples> ProfileMap;
 };
@@ -91,6 +92,111 @@ class CSProfileGenerator : public ProfileGenerator {
     populateInferredFunctionSamples();
   }
 
+  // Remove adjacent repeated context sequences up to a given sequence length,
+  // -1 means no size limit. Note that repeated sequences are identified based
+  // on the exact call site, this is finer granularity than function recursion.
+  template <typename T>
+  static void compressRecursionContext(SmallVectorImpl<T> &Context,
+                                       int32_t CSize = MaxCompressionSize) {
+    uint32_t I = 1;
+    uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
+    uint32_t MaxDedupSize =
+        CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
+    auto BeginIter = Context.begin();
+    // Use an in-place algorithm to save memory copy
+    // End indicates the end location of current iteration's data
+    uint32_t End = 0;
+    // Deduplicate from length 1 to the max possible size of a repeated
+    // sequence.
+    while (I <= MaxDedupSize) {
+      // This is a linear algorithm that deduplicates adjacent repeated
+      // sequences of size I. The deduplication detection runs on a sliding
+      // window whose size is 2*I and it keeps sliding the window to deduplicate
+      // the data inside. Once duplication is detected, deduplicate it by
+      // skipping the right half part of the window, otherwise just copy back
+      // the new one by appending them at the back of End pointer(for the next
+      // iteration).
+      //
+      // For example:
+      // Input: [a1, a2, b1, b2]
+      // (Added index to distinguish the same char, the origin is [a, a, b,
+      // b], the size of the dedup window is 2(I = 1) at the beginning)
+      //
+      // 1) The initial status is a dummy window[null, a1], then just copy the
+      // right half of the window(End = 0), then slide the window.
+      // Result: [a1], a2, b1, b2 (End points to the element right before ],
+      // after ] is the data of the previous iteration)
+      //
+      // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
+      // the window i.e the duplication happen. Only slide the window.
+      // Result: [a1], a2, b1, b2
+      //
+      // 3) Next window is [a2, b1], copy the right half of the window(b1 is
+      // new) to the End and slide the window.
+      // Result: [a1, b1], b1, b2
+      //
+      // 4) Next window is [b1, b2], same to 2), skip b2.
+      // Result: [a1, b1], b1, b2
+      // After resize, it will be [a, b]
+
+      // Use pointers like below to do comparison inside the window
+      //    [a         b         c        a       b        c]
+      //     |         |         |                |        |
+      // LeftBoundary Left     Right           Left+I    Right+I
+      // A duplication found if Left < LeftBoundry.
+
+      int32_t Right = I - 1;
+      End = I;
+      int32_t LeftBoundary = 0;
+      while (Right + I < Context.size()) {
+        // To avoids scanning a part of a sequence repeatedly, it finds out
+        // the common suffix of two hald in the window. The common suffix will
+        // serve as the common prefix of next possible pair of duplicate
+        // sequences. The non-common part will be ignored and never scanned
+        // again.
+
+        // For example.
+        // Input: [a, b1], c1, b2, c2
+        // I = 2
+        //
+        // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
+        // part is 'c1', copy it and only slide the window 1 step.
+        // Result: [a, b1, c1], b2, c2
+        //
+        // 2) Next window is [b1, c1, b2, c2], so duplication happen.
+        // Result after resize: [a, b, c]
+
+        int32_t Left = Right;
+        while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
+          // Find the longest suffix inside the window. When stops, Left points
+          // at the diverging point in the current sequence.
+          Left--;
+        }
+
+        bool DuplicationFound = (Left < LeftBoundary);
+        // Don't need to recheck the data before Right
+        LeftBoundary = Right + 1;
+        if (DuplicationFound) {
+          // Duplication found, skip right half of the window.
+          Right += I;
+        } else {
+          // Copy the non-common-suffix part of the adjacent sequence.
+          std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
+                    BeginIter + End);
+          End += Left + I - Right;
+          // Only slide the window by the size of non-common-suffix
+          Right = Left + I;
+        }
+      }
+      // Don't forget the remaining part that's not scanned.
+      std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
+      End += Context.size() - Right - 1;
+      I++;
+      Context.resize(End);
+      MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
+    }
+  }
+
 protected:
   // Lookup or create FunctionSamples for the context
   FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
@@ -109,6 +215,11 @@ class CSProfileGenerator : public ProfileGenerator {
                                        const BranchSample &BranchCounters,
                                        ProfiledBinary *Binary);
   void populateInferredFunctionSamples();
+
+public:
+  // Deduplicate adjacent repeated context sequences up to a given sequence
+  // length. -1 means no size limit.
+  static int32_t MaxCompressionSize;
 };
 
 using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
@@ -127,22 +238,23 @@ class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
                               ProbeCounterMap &ProbeCounter,
                               ProfiledBinary *Binary);
   // Fill in function body samples from probes
-  void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
-                                     StringRef PrefixContextId,
-                                     ProfiledBinary *Binary);
+  void
+  populateBodySamplesWithProbes(const RangeSample &RangeCounter,
+                                SmallVectorImpl<std::string> &ContextStrStack,
+                                ProfiledBinary *Binary);
   // Fill in boundary samples for a call probe
-  void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
-                                         StringRef PrefixContextId,
-                                         ProfiledBinary *Binary);
+  void populateBoundarySamplesWithProbes(
+      const BranchSample &BranchCounter,
+      SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary);
   // Helper function to get FunctionSamples for the leaf inlined context
-  FunctionSamples &getFunctionProfileForLeafProbe(
-      StringRef PrefixContextId,
-      SmallVector<std::string, 16> &LeafInlinedContext,
-      const PseudoProbeFuncDesc *LeafFuncDesc);
+  FunctionSamples &
+  getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
+                                 const PseudoProbeFuncDesc *LeafFuncDesc);
   // Helper function to get FunctionSamples for the leaf probe
-  FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId,
-                                                  const PseudoProbe *LeafProbe,
-                                                  ProfiledBinary *Binary);
+  FunctionSamples &
+  getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> &ContextStrStack,
+                                 const PseudoProbe *LeafProbe,
+                                 ProfiledBinary *Binary);
 };
 
 } // end namespace sampleprof
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 4b31dff8cd02..16ef04aba99e 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -8,6 +8,7 @@
 
 #include "ProfiledBinary.h"
 #include "ErrorHandling.h"
+#include "ProfileGenerator.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Support/CommandLine.h"
@@ -128,7 +129,7 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
 std::string
 ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
   std::string ContextStr;
-  SmallVector<std::string, 8> ContextVec;
+  SmallVector<std::string, 16> ContextVec;
   // Process from frame root to leaf
   for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
     uint64_t Offset = virtualAddrToOffset(*Iter);
@@ -139,21 +140,22 @@ ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
   }
 
   assert(ContextVec.size() && "Context length should be at least 1");
+  // Compress the context string except for the leaf frame
+  std::string LeafFrame = ContextVec.back();
+  ContextVec.pop_back();
+  CSProfileGenerator::compressRecursionContext<std::string>(ContextVec);
 
   std::ostringstream OContextStr;
   for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {
     if (OContextStr.str().size()) {
       OContextStr << " @ ";
     }
-
-    if (I == ContextVec.size() - 1) {
-      // Only keep the function name for the leaf frame
-      StringRef Ref(ContextVec[I]);
-      OContextStr << Ref.split(":").first.str();
-    } else {
-      OContextStr << ContextVec[I];
-    }
+    OContextStr << ContextVec[I];
   }
+  // Only keep the function name for the leaf frame
+  if (OContextStr.str().size())
+    OContextStr << " @ ";
+  OContextStr << StringRef(LeafFrame).split(":").first.str();
   return OContextStr.str();
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 40aee39677e5..bc28e58deb9d 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -243,7 +243,7 @@ class ProfiledBinary {
   }
   void
   getInlineContextForProbe(const PseudoProbe *Probe,
-                           SmallVector<std::string, 16> &InlineContextStack,
+                           SmallVectorImpl<std::string> &InlineContextStack,
                            bool IncludeLeaf = false) const {
     return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack,
                                                  IncludeLeaf);
diff --git a/llvm/tools/llvm-profgen/PseudoProbe.cpp b/llvm/tools/llvm-profgen/PseudoProbe.cpp
index 700984e2184a..a537d9012e6d 100644
--- a/llvm/tools/llvm-profgen/PseudoProbe.cpp
+++ b/llvm/tools/llvm-profgen/PseudoProbe.cpp
@@ -34,7 +34,7 @@ void PseudoProbeFuncDesc::print(raw_ostream &OS) {
   OS << "Hash: " << FuncHash << "\n";
 }
 
-void PseudoProbe::getInlineContext(SmallVector<std::string, 16> &ContextStack,
+void PseudoProbe::getInlineContext(SmallVectorImpl<std::string> &ContextStack,
                                    const GUIDProbeFunctionMap &GUID2FuncMAP,
                                    bool ShowName) const {
   uint32_t Begin = ContextStack.size();
@@ -320,7 +320,7 @@ PseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
 }
 
 void PseudoProbeDecoder::getInlineContextForProbe(
-    const PseudoProbe *Probe, SmallVector<std::string, 16> &InlineContextStack,
+    const PseudoProbe *Probe, SmallVectorImpl<std::string> &InlineContextStack,
     bool IncludeLeaf) const {
   Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap, true);
   if (!IncludeLeaf)
diff --git a/llvm/tools/llvm-profgen/PseudoProbe.h b/llvm/tools/llvm-profgen/PseudoProbe.h
index a6647eb39c7a..207772453c97 100644
--- a/llvm/tools/llvm-profgen/PseudoProbe.h
+++ b/llvm/tools/llvm-profgen/PseudoProbe.h
@@ -138,7 +138,7 @@ struct PseudoProbe {
   // Get the inlined context by traversing current inline tree backwards,
   // each tree node has its InlineSite which is taken as the context.
   // \p ContextStack is populated in root to leaf order
-  void getInlineContext(SmallVector<std::string, 16> &ContextStack,
+  void getInlineContext(SmallVectorImpl<std::string> &ContextStack,
                         const GUIDProbeFunctionMap &GUID2FuncMAP,
                         bool ShowName) const;
   // Helper function to get the string from context stack
@@ -214,7 +214,7 @@ class PseudoProbeDecoder {
   //  IncludeLeaf = false, Output: [main:1, foo:2]
   void
   getInlineContextForProbe(const PseudoProbe *Probe,
-                           SmallVector<std::string, 16> &InlineContextStack,
+                           SmallVectorImpl<std::string> &InlineContextStack,
                            bool IncludeLeaf) const;
 
   const AddressProbesMap &getAddress2ProbesMap() const {
diff --git a/llvm/unittests/tools/CMakeLists.txt b/llvm/unittests/tools/CMakeLists.txt
index e7c7dca68d49..7861da8c0e38 100644
--- a/llvm/unittests/tools/CMakeLists.txt
+++ b/llvm/unittests/tools/CMakeLists.txt
@@ -7,4 +7,4 @@ endif()
 add_subdirectory(
   llvm-exegesis
 )
-
+add_subdirectory(llvm-profgen)
diff --git a/llvm/unittests/tools/llvm-profgen/CMakeLists.txt b/llvm/unittests/tools/llvm-profgen/CMakeLists.txt
new file mode 100644
index 000000000000..5a658cf70846
--- /dev/null
+++ b/llvm/unittests/tools/llvm-profgen/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_llvm_unittest(LLVMProfgenTests
+    ContextCompressionTest.cpp
+  )
+
+target_link_libraries(LLVMProfgenTests PRIVATE LLVMTestingSupport)
+
+add_dependencies(LLVMProfgenTests intrinsics_gen)
diff --git a/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp b/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp
new file mode 100644
index 000000000000..7f0cee8878af
--- /dev/null
+++ b/llvm/unittests/tools/llvm-profgen/ContextCompressionTest.cpp
@@ -0,0 +1,36 @@
+//===-- ContextCompressionTest.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "../tools/llvm-profgen/ProfileGenerator.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace sampleprof;
+
+TEST(TestCompression, TestNoSizeLimit1) {
+  SmallVector<std::string, 16> Context = {"a", "b", "c", "a", "b", "c"};
+  SmallVector<std::string, 16> Expect = {"a", "b", "c"};
+  CSProfileGenerator::compressRecursionContext(Context, -1);
+  EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin()));
+}
+
+TEST(TestCompression, TestNoSizeLimit2) {
+  SmallVector<std::string, 16> Context = {"m", "a", "a", "b", "c", "a",
+                                          "b", "c", "b", "c", "d"};
+  SmallVector<std::string, 16> Expect = {"m", "a", "b", "c", "d"};
+  CSProfileGenerator::compressRecursionContext(Context, -1);
+  EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin()));
+}
+
+TEST(TestCompression, TestMaxDedupSize) {
+  SmallVector<std::string, 16> Context = {"m", "a", "a", "b", "c", "a",
+                                          "b", "c", "b", "c", "d"};
+  SmallVector<std::string, 16> Expect = {"m", "a", "b", "c",
+                                         "a", "b", "c", "d"};
+  CSProfileGenerator::compressRecursionContext(Context, 2);
+  EXPECT_TRUE(std::equal(Context.begin(), Context.end(), Expect.begin()));
+}

From e562ff08f634d814c1cd1e65e3428ca5308d3022 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Mon, 11 Jan 2021 12:47:22 -0800
Subject: [PATCH 139/244] [CSSPGO][llvm-profgen] Aggregate samples on call
 frame trie to speed up profile generation

For CS profile generation, the process of call stack unwinding is time-consuming since for each LBR entry we need linear time to generate the context( hash, compression, string concatenation). This change speeds up this by grouping all the call frame within one LBR sample into a trie and aggregating the result(sample counter) on it, deferring the context compression and string generation to the end of unwinding.

Specifically, it uses `StackLeaf` as the top frame on the stack and manipulates(pop or push a trie node) it dynamically during virtual unwinding so that the raw sample can just be recoded on the leaf node, the path(root to leaf) will represent its calling context. In the end, it traverses the trie and generates the context on the fly.

Results:
Our internal branch shows about 5X speed-up on some large workloads in SPEC06 benchmark.

Differential Revision: https://reviews.llvm.org/D94110
---
 llvm/tools/llvm-profgen/PerfReader.cpp     | 162 +++++++++++---------
 llvm/tools/llvm-profgen/PerfReader.h       | 167 ++++++++++++++++-----
 llvm/tools/llvm-profgen/ProfiledBinary.cpp |   8 +-
 llvm/tools/llvm-profgen/ProfiledBinary.h   |   3 +-
 4 files changed, 232 insertions(+), 108 deletions(-)

diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index d05c665f8583..787bde28400f 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -28,11 +28,12 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
   // 2nd frame is in prolog/epilog. In the future, we will switch to
   // pro/epi tracker(Dwarf CFI) for the precise check.
   uint64_t Source = State.getCurrentLBRSource();
-  auto Iter = State.CallStack.begin();
-  if (State.CallStack.size() == 1 || *(++Iter) != Source) {
-    State.CallStack.front() = Source;
+  auto *ParentFrame = State.getParentFrame();
+  if (ParentFrame == State.getDummyRootPtr() ||
+      ParentFrame->Address != Source) {
+    State.switchToFrame(Source);
   } else {
-    State.CallStack.pop_front();
+    State.popFrame();
   }
   State.InstPtr.update(Source);
 }
@@ -41,26 +42,29 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
   InstructionPointer &IP = State.InstPtr;
   uint64_t Target = State.getCurrentLBRTarget();
   uint64_t End = IP.Address;
-  if (State.getBinary()->usePseudoProbes()) {
+  if (Binary->usePseudoProbes()) {
+    // We don't need to top frame probe since it should be extracted
+    // from the range.
     // The outcome of the virtual unwinding with pseudo probes is a
     // map from a context key to the address range being unwound.
     // This means basically linear unwinding is not needed for pseudo
     // probes. The range will be simply recorded here and will be
     // converted to a list of pseudo probes to report in ProfileGenerator.
-    recordRangeCount(Target, End, State, Repeat);
+    State.getParentFrame()->recordRangeCount(Target, End, Repeat);
   } else {
     // Unwind linear execution part
+    uint64_t LeafAddr = State.CurrentLeafFrame->Address;
     while (IP.Address >= Target) {
       uint64_t PrevIP = IP.Address;
       IP.backward();
       // Break into segments for implicit call/return due to inlining
-      bool SameInlinee =
-          State.getBinary()->inlineContextEqual(PrevIP, IP.Address);
+      bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
       if (!SameInlinee || PrevIP == Target) {
-        recordRangeCount(PrevIP, End, State, Repeat);
+        State.switchToFrame(LeafAddr);
+        State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
         End = IP.Address;
       }
-      State.CallStack.front() = IP.Address;
+      LeafAddr = IP.Address;
     }
   }
 }
@@ -68,9 +72,9 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
 void VirtualUnwinder::unwindReturn(UnwindState &State) {
   // Add extra frame as we unwind through the return
   const LBREntry &LBR = State.getCurrentLBR();
-  uint64_t CallAddr = State.getBinary()->getCallAddrFromFrameAddr(LBR.Target);
-  State.CallStack.front() = CallAddr;
-  State.CallStack.push_front(LBR.Source);
+  uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
+  State.switchToFrame(CallAddr);
+  State.pushFrame(LBR.Source);
   State.InstPtr.update(LBR.Source);
 }
 
@@ -78,79 +82,100 @@ void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
   // TODO: Tolerate tail call for now, as we may see tail call from libraries.
   // This is only for intra function branches, excluding tail calls.
   uint64_t Source = State.getCurrentLBRSource();
-  State.CallStack.front() = Source;
+  State.switchToFrame(Source);
   State.InstPtr.update(Source);
 }
 
-SampleCounter &
-VirtualUnwinder::getOrCreateCounter(const ProfiledBinary *Binary,
-                                    std::list<uint64_t> &CallStack) {
-  if (Binary->usePseudoProbes()) {
-    return getOrCreateCounterForProbe(Binary, CallStack);
-  }
+std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
   std::shared_ptr<StringBasedCtxKey> KeyStr =
       std::make_shared<StringBasedCtxKey>();
-  KeyStr->Context = Binary->getExpandedContextStr(CallStack);
+  KeyStr->Context = Binary->getExpandedContextStr(Stack);
   KeyStr->genHashCode();
-  auto Ret =
-      CtxCounterMap->emplace(Hashable<ContextKey>(KeyStr), SampleCounter());
-  return Ret.first->second;
+  return KeyStr;
 }
 
-SampleCounter &
-VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
-                                            std::list<uint64_t> &CallStack) {
+std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
   std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
       std::make_shared<ProbeBasedCtxKey>();
-  if (CallStack.size() > 1) {
-    // We don't need to top frame probe since it should be extracted
-    // from the range.
-    // The top of stack is an instruction from the function where
-    // the LBR address range physcially resides. Strip it since
-    // the function is not a part of the call context. We also
-    // don't need its inline context since the probes being unwound
-    // come with an inline context all the way back to the uninlined
-    // function in their prefix tree.
-    auto Iter = CallStack.rbegin();
-    auto EndT = std::prev(CallStack.rend());
-    for (; Iter != EndT; Iter++) {
-      uint64_t Address = *Iter;
-      const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Address);
-      // We may not find a probe for a merged or external callsite.
-      // Callsite merging may cause the loss of original probe IDs.
-      // Cutting off the context from here since the inline will
-      // not know how to consume a context with unknown callsites.
-      if (!CallProbe)
-        break;
-      ProbeBasedKey->Probes.emplace_back(CallProbe);
-    }
+  for (auto CallProbe : Stack) {
+    ProbeBasedKey->Probes.emplace_back(CallProbe);
   }
   CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
       ProbeBasedKey->Probes);
   ProbeBasedKey->genHashCode();
-  Hashable<ContextKey> ContextId(ProbeBasedKey);
-  auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());
-  return Ret.first->second;
+  return ProbeBasedKey;
+}
+
+template <typename T>
+void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
+                                              T &Stack) {
+  if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
+    return;
+
+  std::shared_ptr<ContextKey> Key = Stack.getContextKey();
+  auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
+  SampleCounter &SCounter = Ret.first->second;
+  for (auto &Item : Cur->RangeSamples) {
+    uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
+    uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
+    SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
+  }
+
+  for (auto &Item : Cur->BranchSamples) {
+    uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
+    uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
+    SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
+  }
+}
+
+template <typename T>
+void VirtualUnwinder::collectSamplesFromFrameTrie(
+    UnwindState::ProfiledFrame *Cur, T &Stack) {
+  if (!Cur->isDummyRoot()) {
+    if (!Stack.pushFrame(Cur)) {
+      // Process truncated context
+      for (const auto &Item : Cur->Children) {
+        // Start a new traversal ignoring its bottom context
+        collectSamplesFromFrameTrie(Item.second.get());
+      }
+      return;
+    }
+  }
+
+  collectSamplesFromFrame(Cur, Stack);
+  // Process children frame
+  for (const auto &Item : Cur->Children) {
+    collectSamplesFromFrameTrie(Item.second.get(), Stack);
+  }
+  // Recover the call stack
+  Stack.popFrame();
 }
 
-void VirtualUnwinder::recordRangeCount(uint64_t Start, uint64_t End,
-                                       UnwindState &State, uint64_t Repeat) {
-  uint64_t StartOffset = State.getBinary()->virtualAddrToOffset(Start);
-  uint64_t EndOffset = State.getBinary()->virtualAddrToOffset(End);
-  SampleCounter &SCounter =
-      getOrCreateCounter(State.getBinary(), State.CallStack);
-  SCounter.recordRangeCount(StartOffset, EndOffset, Repeat);
+void VirtualUnwinder::collectSamplesFromFrameTrie(
+    UnwindState::ProfiledFrame *Cur) {
+  if (Binary->usePseudoProbes()) {
+    ProbeStack Stack(Binary);
+    collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
+  } else {
+    FrameStack Stack(Binary);
+    collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
+  }
 }
 
 void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
                                         UnwindState &State, uint64_t Repeat) {
   if (Branch.IsArtificial)
     return;
-  uint64_t SourceOffset = State.getBinary()->virtualAddrToOffset(Branch.Source);
-  uint64_t TargetOffset = State.getBinary()->virtualAddrToOffset(Branch.Target);
-  SampleCounter &SCounter =
-      getOrCreateCounter(State.getBinary(), State.CallStack);
-  SCounter.recordBranchCount(SourceOffset, TargetOffset, Repeat);
+
+  if (Binary->usePseudoProbes()) {
+    // Same as recordRangeCount, We don't need to top frame probe since we will
+    // extract it from branch's source address
+    State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
+                                              Repeat);
+  } else {
+    State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
+                                              Repeat);
+  }
 }
 
 bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
@@ -199,6 +224,8 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
     // Record `branch` with calling context after unwinding.
     recordBranchCount(Branch, State, Repeat);
   }
+  // As samples are aggregated on trie, record them into counter map
+  collectSamplesFromFrameTrie(State.getDummyRootPtr());
 
   return true;
 }
@@ -325,7 +352,8 @@ void PerfReader::printUnwinderOutput() {
 void PerfReader::unwindSamples() {
   for (const auto &Item : AggregatedSamples) {
     const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
-    VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary]);
+    VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
+                             Sample->Binary);
     Unwinder.unwind(Sample, Item.second);
   }
 
@@ -334,7 +362,7 @@ void PerfReader::unwindSamples() {
 }
 
 bool PerfReader::extractLBRStack(TraceStream &TraceIt,
-                                 SmallVector<LBREntry, 16> &LBRStack,
+                                 SmallVectorImpl<LBREntry> &LBRStack,
                                  ProfiledBinary *Binary) {
   // The raw format of LBR stack is like:
   // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
@@ -398,7 +426,7 @@ bool PerfReader::extractLBRStack(TraceStream &TraceIt,
 }
 
 bool PerfReader::extractCallstack(TraceStream &TraceIt,
-                                  std::list<uint64_t> &CallStack) {
+                                  SmallVectorImpl<uint64_t> &CallStack) {
   // The raw format of call stack is like:
   //            4005dc      # leaf frame
   //	          400634
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 66649a060bc3..7eaa4b846259 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -133,7 +133,7 @@ struct HybridSample : public PerfSample {
   // Profiled binary that current frame address belongs to
   ProfiledBinary *Binary;
   // Call stack recorded in FILO(leaf to root) order
-  std::list<uint64_t> CallStack;
+  SmallVector<uint64_t, 16> CallStack;
   // LBR stack recorded in FIFO order
   SmallVector<LBREntry, 16> LBRStack;
 
@@ -147,7 +147,7 @@ struct HybridSample : public PerfSample {
     const HybridSample *Other = dyn_cast<HybridSample>(K);
     if (Other->Binary != Binary)
       return false;
-    const std::list<uint64_t> &OtherCallStack = Other->CallStack;
+    const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
     const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
 
     if (CallStack.size() != OtherCallStack.size() ||
@@ -193,14 +193,40 @@ using AggregatedCounter =
     std::unordered_map<Hashable<PerfSample>, uint64_t,
                        Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
 
+using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
 // The state for the unwinder, it doesn't hold the data but only keep the
 // pointer/index of the data, While unwinding, the CallStack is changed
 // dynamicially and will be recorded as the context of the sample
 struct UnwindState {
   // Profiled binary that current frame address belongs to
   const ProfiledBinary *Binary;
-  // TODO: switch to use trie for call stack
-  std::list<uint64_t> CallStack;
+  // Call stack trie node
+  struct ProfiledFrame {
+    const uint64_t Address = 0;
+    ProfiledFrame *Parent;
+    SampleVector RangeSamples;
+    SampleVector BranchSamples;
+    std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
+
+    ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
+        : Address(Addr), Parent(P) {}
+    ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
+      assert(Address && "Address can't be zero!");
+      auto Ret = Children.emplace(
+          Address, std::make_unique<ProfiledFrame>(Address, this));
+      return Ret.first->second.get();
+    }
+    void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
+      RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
+    }
+    void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
+      BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
+    }
+    bool isDummyRoot() { return Address == 0; }
+  };
+
+  ProfiledFrame DummyTrieRoot;
+  ProfiledFrame *CurrentLeafFrame;
   // Used to fall through the LBR stack
   uint32_t LBRIndex = 0;
   // Reference to HybridSample.LBRStack
@@ -208,19 +234,20 @@ struct UnwindState {
   // Used to iterate the address range
   InstructionPointer InstPtr;
   UnwindState(const HybridSample *Sample)
-      : Binary(Sample->Binary), CallStack(Sample->CallStack),
-        LBRStack(Sample->LBRStack),
-        InstPtr(Sample->Binary, Sample->CallStack.front()) {}
+      : Binary(Sample->Binary), LBRStack(Sample->LBRStack),
+        InstPtr(Sample->Binary, Sample->CallStack.front()) {
+    initFrameTrie(Sample->CallStack);
+  }
 
   bool validateInitialState() {
     uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
-    uint64_t StackLeaf = CallStack.front();
+    uint64_t LeafAddr = CurrentLeafFrame->Address;
     // When we take a stack sample, ideally the sampling distance between the
     // leaf IP of stack and the last LBR target shouldn't be very large.
     // Use a heuristic size (0x100) to filter out broken records.
-    if (StackLeaf < LBRLeaf || StackLeaf >= LBRLeaf + 0x100) {
+    if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
       WithColor::warning() << "Bogus trace: stack tip = "
-                           << format("%#010x", StackLeaf)
+                           << format("%#010x", LeafAddr)
                            << ", LBR tip = " << format("%#010x\n", LBRLeaf);
       return false;
     }
@@ -228,19 +255,40 @@ struct UnwindState {
   }
 
   void checkStateConsistency() {
-    assert(InstPtr.Address == CallStack.front() &&
+    assert(InstPtr.Address == CurrentLeafFrame->Address &&
            "IP should align with context leaf");
   }
 
-  std::string getExpandedContextStr() const {
-    return Binary->getExpandedContextStr(CallStack);
-  }
   const ProfiledBinary *getBinary() const { return Binary; }
   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
   const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
   void advanceLBR() { LBRIndex++; }
+
+  ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
+
+  void pushFrame(uint64_t Address) {
+    CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
+  }
+
+  void switchToFrame(uint64_t Address) {
+    if (CurrentLeafFrame->Address == Address)
+      return;
+    CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
+  }
+
+  void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
+
+  void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
+    ProfiledFrame *Cur = &DummyTrieRoot;
+    for (auto Address : reverse(CallStack)) {
+      Cur = Cur->getOrCreateChildFrame(Address);
+    }
+    CurrentLeafFrame = Cur;
+  }
+
+  ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
 };
 
 // Base class for sample counter key with context
@@ -330,6 +378,56 @@ using ContextSampleCounterMap =
     std::unordered_map<Hashable<ContextKey>, SampleCounter,
                        Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
 
+struct FrameStack {
+  SmallVector<uint64_t, 16> Stack;
+  const ProfiledBinary *Binary;
+  FrameStack(const ProfiledBinary *B) : Binary(B) {}
+  bool pushFrame(UnwindState::ProfiledFrame *Cur) {
+    Stack.push_back(Cur->Address);
+    return true;
+  }
+
+  void popFrame() {
+    if (!Stack.empty())
+      Stack.pop_back();
+  }
+  std::shared_ptr<StringBasedCtxKey> getContextKey();
+};
+
+struct ProbeStack {
+  SmallVector<const PseudoProbe *, 16> Stack;
+  const ProfiledBinary *Binary;
+  ProbeStack(const ProfiledBinary *B) : Binary(B) {}
+  bool pushFrame(UnwindState::ProfiledFrame *Cur) {
+    const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address);
+    // We may not find a probe for a merged or external callsite.
+    // Callsite merging may cause the loss of original probe IDs.
+    // Cutting off the context from here since the inliner will
+    // not know how to consume a context with unknown callsites.
+    if (!CallProbe)
+      return false;
+    Stack.push_back(CallProbe);
+    return true;
+  }
+
+  void popFrame() {
+    if (!Stack.empty())
+      Stack.pop_back();
+  }
+  // Use pseudo probe based context key to get the sample counter
+  // A context stands for a call path from 'main' to an uninlined
+  // callee with all inline frames recovered on that path. The probes
+  // belonging to that call path is the probes either originated from
+  // the callee or from any functions inlined into the callee. Since
+  // pseudo probes are organized in a tri-tree style after decoded,
+  // the tree path from the tri-tree root (which is the uninlined
+  // callee) to the probe node forms an inline context.
+  // Here we use a list of probe(pointer) as the context key to speed up
+  // aggregation and the final context string will be generate in
+  // ProfileGenerator
+  std::shared_ptr<ProbeBasedCtxKey> getContextKey();
+};
+
 /*
 As in hybrid sample we have a group of LBRs and the most recent sampling call
 stack, we can walk through those LBRs to infer more call stacks which would be
@@ -351,47 +449,43 @@ range as sample counter for further CS profile generation.
 */
 class VirtualUnwinder {
 public:
-  VirtualUnwinder(ContextSampleCounterMap *Counter) : CtxCounterMap(Counter) {}
+  VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
+      : CtxCounterMap(Counter), Binary(B) {}
+  bool unwind(const HybridSample *Sample, uint64_t Repeat);
 
+private:
   bool isCallState(UnwindState &State) const {
     // The tail call frame is always missing here in stack sample, we will
     // use a specific tail call tracker to infer it.
-    return State.getBinary()->addressIsCall(State.getCurrentLBRSource());
+    return Binary->addressIsCall(State.getCurrentLBRSource());
   }
 
   bool isReturnState(UnwindState &State) const {
     // Simply check addressIsReturn, as ret is always reliable, both for
     // regular call and tail call.
-    return State.getBinary()->addressIsReturn(State.getCurrentLBRSource());
+    return Binary->addressIsReturn(State.getCurrentLBRSource());
   }
 
   void unwindCall(UnwindState &State);
   void unwindLinear(UnwindState &State, uint64_t Repeat);
   void unwindReturn(UnwindState &State);
   void unwindBranchWithinFrame(UnwindState &State);
-  bool unwind(const HybridSample *Sample, uint64_t Repeat);
+
+  template <typename T>
+  void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
+  // Collect each samples on trie node by DFS traversal
+  template <typename T>
+  void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
+  void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
+
   void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
                         uint64_t Repeat);
   void recordBranchCount(const LBREntry &Branch, UnwindState &State,
                          uint64_t Repeat);
-  SampleCounter &getOrCreateCounter(const ProfiledBinary *Binary,
-                                    std::list<uint64_t> &CallStack);
-  // Use pseudo probe based context key to get the sample counter
-  // A context stands for a call path from 'main' to an uninlined
-  // callee with all inline frames recovered on that path. The probes
-  // belonging to that call path is the probes either originated from
-  // the callee or from any functions inlined into the callee. Since
-  // pseudo probes are organized in a tri-tree style after decoded,
-  // the tree path from the tri-tree root (which is the uninlined
-  // callee) to the probe node forms an inline context.
-  // Here we use a list of probe(pointer) as the context key to speed up
-  // aggregation and the final context string will be generate in
-  // ProfileGenerator
-  SampleCounter &getOrCreateCounterForProbe(const ProfiledBinary *Binary,
-                                            std::list<uint64_t> &CallStack);
 
-private:
   ContextSampleCounterMap *CtxCounterMap;
+  // Profiled binary that current frame address belongs to
+  const ProfiledBinary *Binary;
 };
 
 // Filename to binary map
@@ -457,10 +551,11 @@ class PerfReader {
   // Parse the hybrid sample including the call and LBR line
   void parseHybridSample(TraceStream &TraceIt);
   // Extract call stack from the perf trace lines
-  bool extractCallstack(TraceStream &TraceIt, std::list<uint64_t> &CallStack);
+  bool extractCallstack(TraceStream &TraceIt,
+                        SmallVectorImpl<uint64_t> &CallStack);
   // Extract LBR stack from one perf trace line
   bool extractLBRStack(TraceStream &TraceIt,
-                       SmallVector<LBREntry, 16> &LBRStack,
+                       SmallVectorImpl<LBREntry> &LBRStack,
                        ProfiledBinary *Binary);
   void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
   // Post process the profile after trace aggregation, we will do simple range
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 16ef04aba99e..2c6cedf57649 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -126,13 +126,13 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
                     Context2.begin(), Context2.begin() + Context2.size() - 1);
 }
 
-std::string
-ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
+std::string ProfiledBinary::getExpandedContextStr(
+    const SmallVectorImpl<uint64_t> &Stack) const {
   std::string ContextStr;
   SmallVector<std::string, 16> ContextVec;
   // Process from frame root to leaf
-  for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
-    uint64_t Offset = virtualAddrToOffset(*Iter);
+  for (auto Address : Stack) {
+    uint64_t Offset = virtualAddrToOffset(Address);
     const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
     for (const auto &Loc : ExpandedContext) {
       ContextVec.push_back(getCallSite(Loc));
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index bc28e58deb9d..f6c7460e186d 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -236,7 +236,8 @@ class ProfiledBinary {
   // Get the context string of the current stack with inline context filled in.
   // It will search the disassembling info stored in Offset2LocStackMap. This is
   // used as the key of function sample map
-  std::string getExpandedContextStr(const std::list<uint64_t> &stack) const;
+  std::string
+  getExpandedContextStr(const SmallVectorImpl<uint64_t> &Stack) const;
 
   const PseudoProbe *getCallProbeForAddr(uint64_t Address) const {
     return ProbeDecoder.getCallProbeForAddr(Address);

From 87c27020cc6466ae33550f1f1f55d5989afaca2e Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Thu, 21 Jan 2021 09:36:32 -0800
Subject: [PATCH 140/244] [CSSPGO][llvm-profgen] Merge and trim profile for
 cold context to reduce profile size

This change allows merging and trimming cold context profile in llvm-profgen to solve profile size bloat problem. Currently when the profile's total sample is below threshold(supported by a switch), it will be considered cold and merged into a base context-less profile, which will at least keep the profile quality as good as the baseline(non-cs).

For example, two input profiles:
 [main @ foo @ bar]:60
 [main @ bar]:50
Under threshold = 100, the two profiles will be merge into one with the base context, get result:
 [bar]:110

Added two switches:
`--csprof-cold-thres=<value>`: Specified the total samples threshold for a context profile to be considered cold, with 100 being the default. Any cold context profiles will be merged into context-less base profile by default.
`--csprof-keep-cold`: Force profile generation to keep cold context profiles instead of dropping them. By default, any cold context will not be written to output profile.

Results:
Though not yet evaluating it with the latest CSSPGO, our internal branch shows neutral on performance but significantly reduce the profile size. Detailed evaluation on llvm-profgen with CSSPGO will come later.

Differential Revision: https://reviews.llvm.org/D94111
---
 .../tools/llvm-profgen/inline-cs-noprobe.test |  2 +-
 .../llvm-profgen/inline-cs-pseudoprobe.test   |  2 +-
 .../llvm-profgen/merge-cold-profile.test      | 70 +++++++++++++++++++
 .../llvm-profgen/noinline-cs-noprobe.test     |  2 +-
 .../llvm-profgen/noinline-cs-pseudoprobe.test |  2 +-
 .../recursion-compression-noprobe.test        |  4 +-
 .../recursion-compression-pseudoprobe.test    |  4 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 57 +++++++++++++++
 llvm/tools/llvm-profgen/ProfileGenerator.h    |  8 ++-
 9 files changed, 142 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profgen/merge-cold-profile.test

diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
index 98767a9b29b7..943832ebef10 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:[main:1 @ foo]:44:0
diff --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
index 19928322a66d..c7aa1dea21bb 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:     [main:2 @ foo]:74:0
diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
new file mode 100644
index 000000000000..e0c65ac44e2b
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -0,0 +1,70 @@
+; Used the data from recursion-compression.test, refer it for the unmerged output
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8
+; RUN: FileCheck %s --input-file %t
+
+; Test --csprof-keep-cold
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-keep-cold
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD
+
+; CHECK:     [fa]:14:4
+; CHECK-NEXT: 1: 4
+; CHECK-NEXT: 3: 4
+; CHECK-NEXT: 4: 2
+; CHECK-NEXT: 5: 1
+; CHECK-NEXT: 7: 2 fb:2
+; CHECK-NEXT: 8: 1 fa:1
+; CHECK-NEXT: !CFGChecksum: 120515930909
+; CHECK-NEXT:[main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
+; CHECK-NEXT: 1: 4
+; CHECK-NEXT: 2: 3
+; CHECK-NEXT: 3: 1
+; CHECK-NEXT: 5: 4 fb:4
+; CHECK-NEXT: 6: 1 fa:1
+; CHECK-NEXT: !CFGChecksum: 72617220756
+
+; CHECK-KEEP-COLD:     [fb]:19:6
+; CHECK-KEEP-COLD-NEXT: 1: 6
+; CHECK-KEEP-COLD-NEXT: 2: 3
+; CHECK-KEEP-COLD-NEXT: 3: 3
+; CHECK-KEEP-COLD-NEXT: 5: 4 fb:4
+; CHECK-KEEP-COLD-NEXT: 6: 3 fa:3
+; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 72617220756
+; CHECK-KEEP-COLD-NEXT:[fa]:14:4
+; CHECK-KEEP-COLD-NEXT: 1: 4
+; CHECK-KEEP-COLD-NEXT: 3: 4
+; CHECK-KEEP-COLD-NEXT: 4: 2
+; CHECK-KEEP-COLD-NEXT: 5: 1
+; CHECK-KEEP-COLD-NEXT: 7: 2 fb:2
+; CHECK-KEEP-COLD-NEXT: 8: 1 fa:1
+; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 120515930909
+
+
+; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
+; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
+; -g test.c  -o a.out
+
+; Copied from recursion-compression.test
+#include <stdio.h>
+
+int fb(int n) {
+  if(n > 10) return fb(n / 2);
+  return fa(n - 1);
+}
+
+int fa(int n) {
+  if(n < 2) return n;
+  if(n % 2) return fb(n - 1);
+  return fa(n - 1);
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 10000)
+    s += fa(i);
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}
diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
index 9beecb271fc0..2e60883afa62 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:[main:1 @ foo:3 @ bar]:12:3
diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
index 0491a62ff69b..a0e5507c70dd 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:     [main:2 @ foo]:75:0
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
index 47e0a51a4261..43f495398bb0 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
@@ -1,7 +1,7 @@
 ; Firstly test uncompression(--compress-recursion=0)
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
index 86afe6c632bd..0d4e7dbb1dd4 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
@@ -1,7 +1,7 @@
 ; Firstly test uncompression(--compress-recursion=0)
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index f769bd592f87..b2a8d60d5caf 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -29,6 +29,19 @@ static cl::opt<int32_t, true> RecursionCompression(
     cl::Hidden,
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
 
+static cl::opt<uint64_t> CSProfColdThres(
+    "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
+    cl::desc("Specify the total samples threshold for a context profile to "
+             "be considered cold, any cold profiles will be merged into "
+             "context-less base profiles"));
+
+static cl::opt<bool> CSProfKeepCold(
+    "csprof-keep-cold", cl::init(false), cl::ZeroOrMore,
+    cl::desc("This works together with --csprof-cold-thres. If the total count "
+             "of the profile after all merge is done is still smaller than the "
+             "csprof-cold-thres, it will be trimmed unless csprof-keep-cold "
+             "flag is specified."));
+
 using namespace llvm;
 using namespace sampleprof;
 
@@ -68,6 +81,7 @@ void ProfileGenerator::write() {
   if (std::error_code EC = WriterOrErr.getError())
     exitWithError(EC, OutputFilename);
   auto Writer = std::move(WriterOrErr.get());
+  mergeAndTrimColdProfile(ProfileMap);
   Writer->write(ProfileMap);
 }
 
@@ -329,6 +343,49 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
   }
 }
 
+void CSProfileGenerator::mergeAndTrimColdProfile(
+    StringMap<FunctionSamples> &ProfileMap) {
+  // Nothing to merge if sample threshold is zero
+  if (!CSProfColdThres)
+    return;
+
+  // Filter the cold profiles from ProfileMap and move them into a tmp
+  // container
+  std::vector<std::pair<StringRef, const FunctionSamples *>> ToRemoveVec;
+  for (const auto &I : ProfileMap) {
+    const FunctionSamples &FunctionProfile = I.second;
+    if (FunctionProfile.getTotalSamples() >= CSProfColdThres)
+      continue;
+    ToRemoveVec.emplace_back(I.getKey(), &I.second);
+  }
+
+  // Remove the code profile from ProfileMap and merge them into BaseProileMap
+  StringMap<FunctionSamples> BaseProfileMap;
+  for (const auto &I : ToRemoveVec) {
+    auto Ret =
+        BaseProfileMap.try_emplace(I.second->getName(), FunctionSamples());
+    FunctionSamples &BaseProfile = Ret.first->second;
+    BaseProfile.merge(*I.second);
+    ProfileMap.erase(I.first);
+  }
+
+  // Merge the base profiles into ProfileMap;
+  for (const auto &I : BaseProfileMap) {
+    // Filter the cold base profile
+    if (!CSProfKeepCold && I.second.getTotalSamples() < CSProfColdThres &&
+        ProfileMap.find(I.getKey()) == ProfileMap.end())
+      continue;
+    // Merge the profile if the original profile exists, otherwise just insert
+    // as a new profile
+    FunctionSamples &OrigProfile = getFunctionProfileForContext(I.getKey());
+    StringRef TmpName = OrigProfile.getName();
+    OrigProfile.merge(I.second);
+    // Should use the name ref from ProfileMap's key to avoid name being freed
+    // from BaseProfileMap
+    OrigProfile.setName(TmpName);
+  }
+}
+
 // Helper function to extract context prefix string stack
 // Extract context stack for reusing, leaf context stack will
 // be added compressed while looking up function profile
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 14e58fc9c895..9cb04c4de34d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -28,7 +28,10 @@ class ProfileGenerator {
   create(const BinarySampleCounterMap &BinarySampleCounters,
          enum PerfScriptType SampleType);
   virtual void generateProfile() = 0;
-
+  // Merge and trim profile with cold context before serialization,
+  // only eligible for CS profile
+  virtual void
+  mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap){};
   // Use SampleProfileWriter to serialize profile map
   void write();
 
@@ -200,6 +203,9 @@ class CSProfileGenerator : public ProfileGenerator {
 protected:
   // Lookup or create FunctionSamples for the context
   FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
+  // Merge cold context profile whose total sample is below threshold
+  // into base profile.
+  void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap) override;
 
 private:
   // Helper function for updating body sample for a leaf location in

From db88d92217f185d9ab5b8f0a0eddc5dc9ad30659 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Wed, 20 Jan 2021 17:54:03 -0800
Subject: [PATCH 141/244] [CSSPGO][llvm-profgen] Fix bug with parsing hybrid
 sample trace line

when we skip the call stack starting with an external address, we should also skip the bottom LBR entry, otherwise it will cause a truncated context issue.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D95480
---
 .../llvm-profgen/Inputs/inline-cs-noprobe.perfscript |  6 ++++++
 llvm/tools/llvm-profgen/PerfReader.cpp               | 12 +++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript
index 7ef76dcd3884..116bd0a2c4c1 100644
--- a/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript
+++ b/llvm/test/tools/llvm-profgen/Inputs/inline-cs-noprobe.perfscript
@@ -1,5 +1,11 @@
 PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]: r-xp /home/inline-cs-noprobe.perfbin
 
+; test for an external or invalid top address, should skip the whole sample
+
+	        ffffffff
+	          40067e
+	5541f689495641d7
+ 0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x40069b/0x400670/M/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0  0x4006c8/0x40067e/P/-/-/0
 
 	          40067e
 	5541f689495641d7
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 787bde28400f..e59d8d93381b 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -437,11 +437,12 @@ bool PerfReader::extractCallstack(TraceStream &TraceIt,
   ProfiledBinary *Binary = nullptr;
   while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
     StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
-    // We might get an empty line at the beginning or comments, skip it
     uint64_t FrameAddr = 0;
     if (FrameStr.getAsInteger(16, FrameAddr)) {
+      // We might parse a non-perf sample line like empty line and comments,
+      // skip it
       TraceIt.advance();
-      break;
+      return false;
     }
     TraceIt.advance();
     if (!Binary) {
@@ -468,9 +469,9 @@ bool PerfReader::extractCallstack(TraceStream &TraceIt,
     CallStack.emplace_back(FrameAddr);
   }
 
-  if (CallStack.empty())
-    return false;
   // Skip other unrelated line, find the next valid LBR line
+  // Note that even for empty call stack, we should skip the address at the
+  // bottom, otherwise the following pass may generate a truncated callstack
   while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
     TraceIt.advance();
   }
@@ -482,7 +483,8 @@ bool PerfReader::extractCallstack(TraceStream &TraceIt,
   // of such case - when sample landed in prolog/epilog, somehow stack
   // walking will be broken in an unexpected way that higher frames will be
   // missing.
-  return !Binary->addressInPrologEpilog(CallStack.front());
+  return !CallStack.empty() &&
+         !Binary->addressInPrologEpilog(CallStack.front());
 }
 
 void PerfReader::parseHybridSample(TraceStream &TraceIt) {

From 10712791a9affbea8e6fa474d8a857ea6dfbb955 Mon Sep 17 00:00:00 2001
From: Wenlei He <aktoon@gmail.com>
Date: Wed, 3 Feb 2021 13:27:35 -0800
Subject: [PATCH 142/244] [CSSPGO] Use merged base profile for hot threshold
 calculation

Context-sensitive profile effectively split a function profile into many copies each representing the CFG profile of a particular calling context. That makes the count distribution looks more flat as we now have more function profiles each with lower counts, which in turn leads to lower hot thresholds. Now we tells threshold computation to merge context profile first before calculating percentile based cutoffs to compensate for seemingly flat context profile. This can be controlled by swtich `sample-profile-contextless-threshold`.

Earlier measurement showed ~0.4% perf boost with this tuning on spec2k6 for CSSPGO (with pseudo-probe and new inliner).

Differential Revision: https://reviews.llvm.org/D95980
---
 llvm/include/llvm/ProfileData/ProfileCommon.h |   3 +
 .../lib/ProfileData/ProfileSummaryBuilder.cpp |  34 ++++
 llvm/lib/ProfileData/SampleProfReader.cpp     |   6 +-
 llvm/lib/ProfileData/SampleProfWriter.cpp     |   6 +-
 .../Transforms/SampleProfile/csspgo-inline.ll |   1 -
 .../SampleProfile/csspgo-summary.ll           | 153 ++++++++++++++++++
 6 files changed, 192 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/csspgo-summary.ll

diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index 6bb5825339ae..55b94b2e690d 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Error.h"
 #include <algorithm>
 #include <cstdint>
@@ -89,6 +90,8 @@ class SampleProfileSummaryBuilder final : public ProfileSummaryBuilder {
 
   void addRecord(const sampleprof::FunctionSamples &FS,
                  bool isCallsiteSample = false);
+  std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
+      const StringMap<sampleprof::FunctionSamples> &Profiles);
   std::unique_ptr<ProfileSummary> getSummary();
 };
 
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index d2603097c550..0e03aa50173d 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -18,9 +18,14 @@
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
+cl::opt<bool> UseContextLessSummary(
+    "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+    cl::desc("Merge context profiles before calculating thresholds."));
+
 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
 // (which is 1000000) is a desired percentile of total counts.
 static const uint32_t DefaultCutoffsData[] = {
@@ -111,6 +116,35 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
       MaxFunctionCount, NumCounts, NumFunctions);
 }
 
+std::unique_ptr<ProfileSummary>
+SampleProfileSummaryBuilder::computeSummaryForProfiles(
+    const StringMap<sampleprof::FunctionSamples> &Profiles) {
+  assert(NumFunctions == 0 &&
+         "This can only be called on an empty summary builder");
+  StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
+  const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
+  // For CSSPGO, context-sensitive profile effectively split a function profile
+  // into many copies each representing the CFG profile of a particular calling
+  // context. That makes the count distribution looks more flat as we now have
+  // more function profiles each with lower counts, which in turn leads to lower
+  // hot thresholds. To compensate for that, by defauly we merge context
+  // profiles before coumputing profile summary.
+  if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
+                                !UseContextLessSummary.getNumOccurrences())) {
+    for (const auto &I : Profiles) {
+      ContextLessProfiles[I.second.getName()].merge(I.second);
+    }
+    ProfilesToUse = &ContextLessProfiles;
+  }
+
+  for (const auto &I : *ProfilesToUse) {
+    const sampleprof::FunctionSamples &Profile = I.second;
+    addRecord(Profile);
+  }
+
+  return getSummary();
+}
+
 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
   computeDetailedSummary();
   return std::make_unique<ProfileSummary>(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 370ffc8e2885..38cbca844c87 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -1610,9 +1610,5 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
 // profile. Binary format has the profile summary in its header.
 void SampleProfileReader::computeSummary() {
   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
-  for (const auto &I : Profiles) {
-    const FunctionSamples &Profile = I.second;
-    Builder.addRecord(Profile);
-  }
-  Summary = Builder.getSummary();
+  Summary = Builder.computeSummaryForProfiles(Profiles);
 }
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index d3bc05e06fdf..b388b78dfaca 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -752,9 +752,5 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
 void SampleProfileWriter::computeSummary(
     const StringMap<FunctionSamples> &ProfileMap) {
   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
-  for (const auto &I : ProfileMap) {
-    const FunctionSamples &Profile = I.second;
-    Builder.addRecord(Profile);
-  }
-  Summary = Builder.getSummary();
+  Summary = Builder.computeSummaryForProfiles(ProfileMap);
 }
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
index 14e916d8c2e8..8303ac299318 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
@@ -30,7 +30,6 @@
 
 ; INLINE-NEW-LIMIT1-NOT: remark
 
-; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11
 ; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
 ; INLINE-NEW-LIMIT2-NOT: remark
 
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-summary.ll b/llvm/test/Transforms/SampleProfile/csspgo-summary.ll
new file mode 100644
index 000000000000..42ecf399abdb
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-summary.ll
@@ -0,0 +1,153 @@
+; Test for CSSPGO's profile summary computation with and without pre-merging context profiles
+
+; RUN: opt < %s -passes=sample-profile,print-profile-summary -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-summary-cutoff-hot=999900 -profile-sample-accurate -profile-summary-contextless=0 -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=SUMMARY-UNMERGED
+; RUN: opt < %s -passes=sample-profile,print-profile-summary -sample-profile-file=%S/Inputs/profile-context-tracker.prof -profile-summary-cutoff-hot=999900 -profile-sample-accurate -profile-summary-contextless=1 -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=SUMMARY-MERGED
+
+; SUMMARY-UNMERGED: main :hot entry
+; SUMMARY-MERGED-NOT: main :hot entry
+
+
+@factor = dso_local global i32 3, align 4, !dbg !0
+
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
+entry:
+  br label %for.body, !dbg !25
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add3, !dbg !27
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
+  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
+  %add = add nuw nsw i32 %x.011, 1, !dbg !31
+  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
+  %add2 = add i32 %call, %r.010, !dbg !34
+  %add3 = add i32 %add2, %call1, !dbg !35
+  %dec = add nsw i32 %x.011, -1, !dbg !36
+  %cmp = icmp eq i32 %x.011, 0, !dbg !38
+  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
+}
+
+define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
+entry:
+  %add = add nsw i32 %x, 100000, !dbg !44
+  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
+  ret i32 %call, !dbg !46
+}
+
+define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
+entry:
+  %cmp = icmp sgt i32 %x, 0, !dbg !57
+  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
+
+while.cond2.preheader:                            ; preds = %entry
+  %cmp313 = icmp slt i32 %x, 0, !dbg !60
+  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
+  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
+  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
+  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
+  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
+  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
+
+while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
+  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
+  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
+  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
+  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
+  %cmp3 = icmp slt i32 %add, 0, !dbg !60
+  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
+
+if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
+  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
+  ret i32 %x.addr.2, !dbg !76
+}
+
+define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !51
+  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
+  ret i32 %call, !dbg !53
+}
+
+declare i32 @_Z3fibi(i32)
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
+!4 = !{}
+!5 = !{!6, !10, !11}
+!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!12 = !{!0}
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!14 = !{i32 7, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{i32 1, !"wchar_size", i32 4}
+!17 = !{!"clang version 11.0.0"}
+!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!9}
+!21 = !{!22, !23}
+!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
+!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
+!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
+!25 = !DILocation(line: 13, column: 3, scope: !26)
+!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
+!27 = !DILocation(line: 17, column: 3, scope: !18)
+!28 = !DILocation(line: 14, column: 10, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
+!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
+!31 = !DILocation(line: 14, column: 29, scope: !29)
+!32 = !DILocation(line: 14, column: 21, scope: !33)
+!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!34 = !DILocation(line: 14, column: 19, scope: !29)
+!35 = !DILocation(line: 14, column: 7, scope: !29)
+!36 = !DILocation(line: 13, column: 33, scope: !37)
+!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
+!38 = !DILocation(line: 13, column: 26, scope: !39)
+!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
+!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!44 = !DILocation(line: 27, column: 22, scope: !40)
+!45 = !DILocation(line: 27, column: 11, scope: !40)
+!46 = !DILocation(line: 29, column: 3, scope: !40)
+!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!51 = !DILocation(line: 33, column: 22, scope: !47)
+!52 = !DILocation(line: 33, column: 11, scope: !47)
+!53 = !DILocation(line: 35, column: 3, scope: !47)
+!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!57 = !DILocation(line: 49, column: 9, scope: !58)
+!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
+!59 = !DILocation(line: 49, column: 7, scope: !54)
+!60 = !DILocation(line: 58, column: 14, scope: !61)
+!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
+!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
+!63 = !DILocation(line: 58, column: 5, scope: !61)
+!64 = !DILocation(line: 52, column: 16, scope: !65)
+!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
+!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
+!67 = !DILocation(line: 52, column: 12, scope: !65)
+!68 = !DILocation(line: 52, column: 9, scope: !65)
+!69 = !DILocation(line: 51, column: 14, scope: !70)
+!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
+!71 = !DILocation(line: 51, column: 5, scope: !70)
+!72 = !DILocation(line: 59, column: 16, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
+!74 = !DILocation(line: 59, column: 12, scope: !73)
+!75 = !DILocation(line: 59, column: 9, scope: !73)
+!76 = !DILocation(line: 63, column: 3, scope: !54)

From e8e45f52d0a8268fe3ee2a3a2afc80bc10a47280 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Sun, 7 Feb 2021 22:49:20 -0800
Subject: [PATCH 143/244] [CSSPGO] Unblock optimizations with pseudo probe
 instrumentation.

The IR/MIR pseudo probe intrinsics don't get materialized into real machine instructions and therefore they don't incur runtime cost directly. However, they come with indirect cost by blocking certain optimizations. Some of the blocking are intentional (such as blocking code merge) for better counts quality while the others are accidental. This change unblocks perf-critical optimizations that do not affect counts quality. They include:

1. IR InstCombine, sinking load operation to shorten lifetimes.
2. MIR LiveRangeShrink, similar to #1
3. MIR TwoAddressInstructionPass, i.e, opeq transform
4. MIR function argument copy elision
5. IR stack protection. (though not perf-critical but nice to have).

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D95982
---
 llvm/include/llvm/CodeGen/MachineInstr.h      |  7 ++
 llvm/include/llvm/IR/Instruction.h            |  3 +
 llvm/lib/CodeGen/LiveRangeShrink.cpp          |  3 +-
 llvm/lib/CodeGen/MachineInstr.cpp             |  3 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  5 +-
 llvm/lib/CodeGen/StackProtector.cpp           |  2 +-
 .../lib/CodeGen/TwoAddressInstructionPass.cpp |  8 +--
 llvm/lib/IR/Instruction.cpp                   |  4 ++
 llvm/lib/Transforms/IPO/FunctionAttrs.cpp     |  7 ++
 .../Transforms/InstCombine/InstCombinePHI.cpp |  8 ++-
 .../InstCombine/InstructionCombining.cpp      |  7 +-
 .../SampleProfile/pseudo-probe-instcombine.ll | 66 +++++++++++++++++++
 .../SampleProfile/pseudo-probe-instsched.ll   | 33 ++++++++++
 .../SampleProfile/pseudo-probe-peep.ll        | 29 ++++++++
 .../SampleProfile/pseudo-probe-twoaddr.ll     | 37 +++++++++++
 15 files changed, 209 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll

diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 6bbe2d03f9e5..f8d97c2c07a6 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1156,6 +1156,10 @@ class MachineInstr
     return getOpcode() == TargetOpcode::CFI_INSTRUCTION;
   }
 
+  bool isPseudoProbe() const {
+    return getOpcode() == TargetOpcode::PSEUDO_PROBE;
+  }
+  
   // True if the instruction represents a position in the function.
   bool isPosition() const { return isLabel() || isCFIInstruction(); }
 
@@ -1165,6 +1169,9 @@ class MachineInstr
   bool isDebugInstr() const {
     return isDebugValue() || isDebugLabel() || isDebugRef();
   }
+  bool isDebugOrPseudoInstr() const {
+    return isDebugInstr() || isPseudoProbe();
+  }
 
   bool isDebugOffsetImm() const { return getDebugOffset().isImm(); }
 
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 85afaed5225e..b99dc62bbb9d 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -654,6 +654,9 @@ class Instruction : public User,
   /// llvm.lifetime.end marker.
   bool isLifetimeStartOrEnd() const;
 
+  /// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
+  bool isDebugOrPseudoInst() const;
+
   /// Return a pointer to the next non-debug instruction in the same basic
   /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo
   /// operations if \c SkipPseudoOp is true.
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 26439a656917..7fa14fd902ef 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
         // If MI has side effects, it should become a barrier for code motion.
         // IOM is rebuild from the next instruction to prevent later
         // instructions from being moved before this MI.
-        if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+        if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
+            Next != MBB.end()) {
           BuildInstOrderMap(Next, IOM);
           SawStore = false;
         }
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 59d98054e3a2..b6cfd7dcbfbc 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
 }
 
 bool MachineInstr::isLoadFoldBarrier() const {
-  return mayStore() || isCall() || hasUnmodeledSideEffects();
+  return mayStore() || isCall() ||
+         (hasUnmodeledSideEffects() && !isPseudoProbe());
 }
 
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6638ff6a6358..a6bd774934ac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
       // We will look through cast uses, so ignore them completely.
       if (I.isCast())
         continue;
-      // Ignore debug info intrinsics, they don't escape or store to allocas.
-      if (isa<DbgInfoIntrinsic>(I))
+      // Ignore debug info and pseudo op intrinsics, they don't escape or store
+      // to allocas.
+      if (I.isDebugOrPseudoInst())
         continue;
       // This is an unknown instruction. Assume it escapes or writes to all
       // static alloca operands.
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 0411faabbcc3..8d91afb6e99d 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
       // Ignore intrinsics that do not become real instructions.
       // TODO: Narrow this to intrinsics that have store-like effects.
       const auto *CI = cast<CallInst>(I);
-      if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
+      if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
         return true;
       break;
     }
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index ecee4aed7f88..2a9132bd2fe0 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
   MachineBasicBlock::iterator KillPos = KillMI;
   ++KillPos;
   for (MachineInstr &OtherMI : make_range(End, KillPos)) {
-    // Debug instructions cannot be counted against the limit.
-    if (OtherMI.isDebugInstr())
+    // Debug or pseudo instructions cannot be counted against the limit.
+    if (OtherMI.isDebugOrPseudoInstr())
       continue;
     if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
@@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
   unsigned NumVisited = 0;
   for (MachineInstr &OtherMI :
        make_range(mi, MachineBasicBlock::iterator(KillMI))) {
-    // Debug instructions cannot be counted against the limit.
-    if (OtherMI.isDebugInstr())
+    // Debug or pseudo instructions cannot be counted against the limit.
+    if (OtherMI.isDebugOrPseudoInstr())
       continue;
     if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 246180e72172..8e52dd3ddc71 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -651,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const {
   return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
 }
 
+bool Instruction::isDebugOrPseudoInst() const {
+  return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this);
+}
+
 const Instruction *
 Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
   for (const Instruction *I = getNextNode(); I; I = I->getNextNode())
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 256acd7e1d17..6730824e860a 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
       if (isNoModRef(MRI))
         continue;
 
+      // A pseudo probe call shouldn't change any function attribute since it
+      // doesn't translate to a real instruction. It comes with a memory access
+      // tag to prevent itself being removed by optimizations and not block
+      // other instructions being optimized.
+      if (isa<PseudoProbeInst>(I))
+        continue;
+
       if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
         // The call could access any memory. If that includes writes, note it.
         if (isModSet(MRI))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index d687ec654438..b211b0813611 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
 
   for (++BBI; BBI != E; ++BBI)
-    if (BBI->mayWriteToMemory())
+    if (BBI->mayWriteToMemory()) {
+      // Calls that only access inaccessible memory do not block sinking the
+      // load.
+      if (auto *CB = dyn_cast<CallBase>(BBI))
+        if (CB->onlyAccessesInaccessibleMemory())
+          continue;
       return false;
+    }
 
   // Check for non-address taken alloca.  If not address-taken already, it isn't
   // profitable to do this xform.
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 518e909e8ab4..828fd49524ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3878,9 +3878,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
         }
       }
 
-      // Skip processing debug intrinsics in InstCombine. Processing these call instructions
-      // consumes non-trivial amount of time and provides no value for the optimization.
-      if (!isa<DbgInfoIntrinsic>(Inst)) {
+      // Skip processing debug and pseudo intrinsics in InstCombine. Processing
+      // these call instructions consumes non-trivial amount of time and
+      // provides no value for the optimization.
+      if (!Inst->isDebugOrPseudoInst()) {
         InstrsForInstCombineWorklist.push_back(Inst);
         SeenAliasScopes.analyse(Inst);
       }
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
new file mode 100644
index 000000000000..e5bb7bc541c6
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
@@ -0,0 +1,66 @@
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
+%struct.CompAtomExt = type { i32 }
+%struct.CompAtom = type { %class.Vector, float, i16, i8, i8 }
+%class.Vector = type { double, double, double }
+%class.ComputeNonbondedWorkArrays = type { %class.ResizeArray, %class.ResizeArray.0, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray.2, %class.ResizeArray.2 }
+%class.ResizeArray.0 = type { i32 (...)**, %class.ResizeArrayRaw.1* }
+%class.ResizeArrayRaw.1 = type <{ double*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
+%class.ResizeArray = type { i32 (...)**, %class.ResizeArrayRaw* }
+%class.ResizeArrayRaw = type <{ i16*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
+%class.ResizeArray.2 = type { i32 (...)**, %class.ResizeArrayRaw.3* }
+%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
+%class.Pairlists = type { i16*, i32, i32 }
+
+;; Check the minPart4 and minPart assignments are merged.
+; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
+; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
+
+define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
+entry:
+  %savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
+  %0 = load i32, i32* %savePairlists3, align 8
+  %usePairlists4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 12
+  %1 = load i32, i32* %usePairlists4, align 4
+  %tobool54.not = icmp eq i32 %0, 0
+  br i1 %tobool54.not, label %lor.lhs.false55, label %if.end109
+
+lor.lhs.false55:                                  ; preds = %entry
+  %tobool56.not = icmp eq i32 %1, 0
+  br i1 %tobool56.not, label %if.end109, label %if.end109.thread
+
+if.end109.thread:                                 ; preds = %lor.lhs.false55
+  %minPart4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
+  %2 = load i32, i32* %minPart4, align 4
+  call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 2, i32 0, i64 -1)
+  br label %if.then138
+
+if.end109:                                        ; preds = %lor.lhs.false55, %entry
+  %minPart = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
+  %3 = load i32, i32* %minPart, align 4
+  call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 3, i32 0, i64 -1)
+  %tobool116.not = icmp eq i32 %1, 0
+  br i1 %tobool116.not, label %if.then117, label %if.then138
+
+if.then117:                                       ; preds = %if.end109
+  ret void
+
+if.then138:                                       ; preds = %if.end109.thread, %if.end109
+  %4 = phi i32 [ %2, %if.end109.thread ], [ %3, %if.end109 ]
+  %tobool139.not = icmp eq i32 %4, 0
+  br i1 %tobool139.not, label %if.else147, label %if.then140
+
+if.then140:                                       ; preds = %if.then138
+  ret void
+
+if.else147:                                       ; preds = %if.then138
+  ret void
+}
+
+declare dso_local void @_ZN9Pairlists8addIndexEv() align 2
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
new file mode 100644
index 000000000000..609af90db610
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
@@ -0,0 +1,33 @@
+; PR1075
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -pseudo-probe-for-profiling -O3 | FileCheck %s
+
+define float @foo(float %x) #0 {
+  %tmp1 = fmul float %x, 3.000000e+00
+  %tmp3 = fmul float %x, 5.000000e+00
+  %tmp5 = fmul float %x, 7.000000e+00
+  %tmp7 = fmul float %x, 1.100000e+01
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1)
+  %tmp10 = fadd float %tmp1, %tmp3
+  %tmp12 = fadd float %tmp10, %tmp5
+  %tmp14 = fadd float %tmp12, %tmp7
+  ret float %tmp14
+; CHECK: mulss
+; CHECK: mulss
+; CHECK: addss
+; CHECK: mulss
+; CHECK: addss
+; CHECK: mulss
+; CHECK: addss
+; CHECK: ret
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { inaccessiblememonly nounwind willreturn }
+
+!llvm.pseudo_probe_desc = !{!0}
+
+!0 = !{i64 6699318081062747564, i64 4294967295, !"foo", null}
+
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll
new file mode 100644
index 000000000000..a1fb25c95936
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=x86_64-- -stop-after=peephole-opt -o - %s | FileCheck %s
+
+define internal i32 @arc_compare() {
+entry:
+  %0 = load i64, i64* undef, align 8
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; Chek a register copy has been sinked into the compare instruction.
+; CHECK: %[[#REG:]]:gr64 = IMPLICIT_DEF 
+; CHECK-NOT: %[[#]]:gr64 = MOV64rm %[[#REG]]
+; CHECK: PSEUDO_PROBE 5116412291814990879, 3, 0, 0
+; CHECK: CMP64mr %[[#REG]], 1
+  call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1)
+  %cmp4 = icmp slt i64 %0, undef
+  br i1 %cmp4, label %return, label %if.end6
+
+if.end6:                                          ; preds = %if.end
+  call void @llvm.pseudoprobe(i64 5116412291814990879, i64 5, i32 0, i64 -1)
+  br label %return
+
+return:                                           ; preds = %if.end6, %if.end, %entry
+  ret i32 undef
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll
new file mode 100644
index 000000000000..81f72d3c5871
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll
@@ -0,0 +1,37 @@
+; RUN: llc -stop-after=twoaddressinstruction -mtriple=x86_64-- -o - %s | FileCheck %s
+
+
+define dso_local double @twoaddressinstruction() local_unnamed_addr {
+for.end:
+  %0 = load i64, i64* undef, align 8
+  br label %for.body14.preheader
+
+for.body14.preheader:                             ; preds = %for.end
+  br i1 undef, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14.preheader.new
+
+for.body14.preheader.new:                         ; preds = %for.body14.preheader
+  %unroll_iter136 = and i64 %0, -4
+  br label %for.body14
+
+for.cond25.preheader.loopexit.unr-lcssa:          ; preds = %for.body14, %for.body14.preheader
+  %indvars.iv127.unr = phi i64 [ 1, %for.body14.preheader ], [ %indvars.iv.next128.3, %for.body14 ]
+  ret double undef
+
+for.body14:                                       ; preds = %for.body14, %for.body14.preheader.new
+  %indvars.iv127 = phi i64 [ 1, %for.body14.preheader.new ], [ %indvars.iv.next128.3, %for.body14 ]
+  %niter137 = phi i64 [ %unroll_iter136, %for.body14.preheader.new ], [ %niter137.nsub.3, %for.body14 ]
+  %indvars.iv.next128.3 = add nuw nsw i64 %indvars.iv127, 4
+; CHECK: PSEUDO_PROBE -6878943695821059507, 9, 0, 0
+  call void @llvm.pseudoprobe(i64 -6878943695821059507, i64 9, i32 0, i64 -1)
+;; Check an opeq form of instruction is created.
+; CHECK: %[[#REG:]]:gr64_nosp = COPY killed %[[#]]
+; CHECK: %[[#REG]]:gr64_nosp = nuw ADD64ri8 %[[#REG]], 4, implicit-def dead $eflags
+  %niter137.nsub.3 = add i64 %niter137, -4
+  %niter137.ncmp.3 = icmp eq i64 %niter137.nsub.3, 0
+  br i1 %niter137.ncmp.3, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14
+}
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
+
+attributes #0 = { inaccessiblememonly nounwind willreturn }
\ No newline at end of file

From 1a5bb1e4d540303554c0e891389f699956e5e03b Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Wed, 10 Feb 2021 14:40:47 -0800
Subject: [PATCH 144/244] [CSSPGO] Restrict pseudo probe tests to x86_64 only.

---
 llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll | 4 ++--
 llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll      | 1 +
 llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll   | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
index 609af90db610..9d89cad43aa7 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll
@@ -1,5 +1,5 @@
-; PR1075
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -pseudo-probe-for-profiling -O3 | FileCheck %s
+; REQUIRES: x86_64-linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-- -pseudo-probe-for-profiling -O3 | FileCheck %s
 
 define float @foo(float %x) #0 {
   %tmp1 = fmul float %x, 3.000000e+00
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll
index a1fb25c95936..d94dac4de95d 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll
@@ -1,3 +1,4 @@
+; REQUIRES: x86_64-linux
 ; RUN: llc -mtriple=x86_64-- -stop-after=peephole-opt -o - %s | FileCheck %s
 
 define internal i32 @arc_compare() {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll
index 81f72d3c5871..31b471ea08fd 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll
@@ -1,3 +1,4 @@
+; REQUIRES: x86_64-linux
 ; RUN: llc -stop-after=twoaddressinstruction -mtriple=x86_64-- -o - %s | FileCheck %s
 
 
From 1f5e2016be9a01e4294dcdd10b3c7b03826b26a1 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Tue, 9 Feb 2021 09:17:20 -0800
Subject: [PATCH 145/244] [CSSPGO] Process functions in a top-down order on a
 dynamic call graph.

Functions are currently processed by the sample profiler loader in a top-down order defined by the static call graph. The order is being adjusted to be a top-down order based on the input context-sensitive profile. One benefit is that the processing order of caller and callee in one SCC would follow the context order in the profile to favor more inlining. Another benefit is that the processing order of caller and callee through an indirect call (which is not on the static call graph) can be honored which in turn allows for more inlining.

The profile top-down order for SCC is also extended to support non-CS profiles.

Two switches `-mllvm -use-profile-indirect-call-edges` and `-mllvm -use-profile-top-down-order` are being introduced.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D95988
---
 .../Transforms/IPO/SampleContextTracker.h     |  13 +-
 .../Transforms/IPO/SampleContextTracker.cpp   |  32 +++
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 137 ++++++++++++-
 .../Inputs/profile-context-order.prof         |  38 ++++
 .../Inputs/profile-topdown-order.prof         |  36 ++++
 .../SampleProfile/profile-context-order.ll    | 190 ++++++++++++++++++
 .../SampleProfile/profile-topdown-order.ll    | 179 +++++++++++++++++
 7 files changed, 620 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/profile-topdown-order.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/profile-context-order.ll
 create mode 100644 llvm/test/Transforms/SampleProfile/profile-topdown-order.ll

diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 526e141838c4..da0bdae0eaee 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/ProfileData/SampleProf.h"
@@ -90,6 +91,8 @@ class ContextTrieNode {
 // calling context and the context is identified by path from root to the node.
 class SampleContextTracker {
 public:
+  using ContextSamplesTy = SmallSet<FunctionSamples *, 16>;
+
   SampleContextTracker(StringMap<FunctionSamples> &Profiles);
   // Query context profile for a specific callee with given name at a given
   // call-site. The full context is identified by location of call instruction.
@@ -103,6 +106,9 @@ class SampleContextTracker {
   FunctionSamples *getContextSamplesFor(const DILocation *DIL);
   // Query context profile for a given sample contxt of a function.
   FunctionSamples *getContextSamplesFor(const SampleContext &Context);
+  // Get all context profile for given function.
+  ContextSamplesTy &getAllContextSamplesFor(const Function &Func);
+  ContextSamplesTy &getAllContextSamplesFor(StringRef Name);
   // Query base profile for a given function. A base profile is a merged view
   // of all context profiles for contexts that are not inlined.
   FunctionSamples *getBaseSamplesFor(const Function &Func,
@@ -113,6 +119,9 @@ class SampleContextTracker {
   // This makes sure that inlined context profile will be excluded in
   // function's base profile.
   void markContextSamplesInlined(const FunctionSamples *InlinedSamples);
+  void promoteMergeContextSamplesTree(const Instruction &Inst,
+                                      StringRef CalleeName);
+  void addCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
   // Dump the internal context profile trie.
   void dump();
 
@@ -126,8 +135,6 @@ class SampleContextTracker {
   ContextTrieNode *getTopLevelContextNode(StringRef FName);
   ContextTrieNode &addTopLevelContextNode(StringRef FName);
   ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
-  void promoteMergeContextSamplesTree(const Instruction &Inst,
-                                      StringRef CalleeName);
   void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
                         StringRef ContextStrToRemove);
   ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
@@ -135,7 +142,7 @@ class SampleContextTracker {
                                                   StringRef ContextStrToRemove);
 
   // Map from function name to context profiles (excluding base profile)
-  StringMap<SmallSet<FunctionSamples *, 16>> FuncToCtxtProfileSet;
+  StringMap<ContextSamplesTy> FuncToCtxtProfileSet;
 
   // Root node for context trie tree
   ContextTrieNode RootContext;
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 41d7f363e1a4..158fa0771c3b 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -263,6 +263,17 @@ SampleContextTracker::getContextSamplesFor(const SampleContext &Context) {
   return Node->getFunctionSamples();
 }
 
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(const Function &Func) {
+  StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
+  return FuncToCtxtProfileSet[CanonName];
+}
+
+SampleContextTracker::ContextSamplesTy &
+SampleContextTracker::getAllContextSamplesFor(StringRef Name) {
+  return FuncToCtxtProfileSet[Name];
+}
+
 FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
                                                          bool MergeContext) {
   StringRef CanonName = FunctionSamples::getCanonicalFnName(Func);
@@ -550,4 +561,25 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
   return *ToNode;
 }
 
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleContextTracker::addCallGraphEdges(CallGraph &CG,
+                                             StringMap<Function *> &SymbolMap) {
+  // Add profile call edges to the call graph.
+  std::queue<ContextTrieNode *> NodeQueue;
+  NodeQueue.push(&RootContext);
+  while (!NodeQueue.empty()) {
+    ContextTrieNode *Node = NodeQueue.front();
+    NodeQueue.pop();
+    Function *F = SymbolMap.lookup(Node->getFuncName());
+    for (auto &I : Node->getAllChildContext()) {
+      ContextTrieNode *ChildNode = &I.second;
+      NodeQueue.push(ChildNode);
+      if (F && !F->isDeclaration()) {
+        Function *Callee = SymbolMap.lookup(ChildNode->getFuncName());
+        if (Callee && !Callee->isDeclaration())
+          CG[F]->addCalledFunction(nullptr, CG[Callee]);
+      }
+    }
+  }
+}
 } // namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b2a9127773c3..a6a419bfe742 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -177,6 +177,16 @@ static cl::opt<bool> ProfileTopDownLoad(
              "order of call graph during sample profile loading. It only "
              "works for new pass manager. "));
 
+static cl::opt<bool> UseProfileIndirectCallEdges(
+    "use-profile-indirect-call-edges", cl::init(true), cl::Hidden,
+    cl::desc("Considering indirect call samples from profile when top-down "
+             "processing functions. Only CSSPGO is supported."));
+
+static cl::opt<bool> UseProfileTopDownOrder(
+    "use-profile-top-down-order", cl::init(false), cl::Hidden,
+    cl::desc("Process functions in one SCC in a top-down order "
+             "based on the input profile."));
+
 static cl::opt<bool> ProfileSizeInline(
     "sample-profile-inline-size", cl::Hidden, cl::init(false),
     cl::desc("Inline cold call sites in profile loader if it's beneficial "
@@ -458,6 +468,8 @@ class SampleProfileLoader {
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(Function &F);
   std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
+  void addCallGraphEdges(CallGraph &CG, const FunctionSamples &Samples);
+  void replaceCallGraphEdges(CallGraph &CG, StringMap<Function *> &SymbolMap);
   bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
   void computeDominanceAndLoopInfo(Function &F);
   void clearFunctionData();
@@ -2278,6 +2290,45 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
                     "Sample Profile loader", false, false)
 
+// Add inlined profile call edges to the call graph.
+void SampleProfileLoader::addCallGraphEdges(CallGraph &CG,
+                                            const FunctionSamples &Samples) {
+  Function *Caller = SymbolMap.lookup(Samples.getFuncName());
+  if (!Caller || Caller->isDeclaration())
+    return;
+
+  // Skip non-inlined call edges which are not important since top down inlining
+  // for non-CS profile is to get more precise profile matching, not to enable
+  // more inlining.
+
+  for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
+    for (const auto &InlinedSamples : CallsiteSamples.second) {
+      Function *Callee = SymbolMap.lookup(InlinedSamples.first);
+      if (Callee && !Callee->isDeclaration())
+        CG[Caller]->addCalledFunction(nullptr, CG[Callee]);
+      addCallGraphEdges(CG, InlinedSamples.second);
+    }
+  }
+}
+
+// Replace call graph edges with dynamic call edges from the profile.
+void SampleProfileLoader::replaceCallGraphEdges(
+    CallGraph &CG, StringMap<Function *> &SymbolMap) {
+  // Remove static call edges from the call graph except for the ones from the
+  // root which make the call graph connected.
+  for (const auto &Node : CG)
+    if (Node.second.get() != CG.getExternalCallingNode())
+      Node.second->removeAllCalledFunctions();
+
+  // Add profile call edges to the call graph.
+  if (ProfileIsCS) {
+    ContextTracker->addCallGraphEdges(CG, SymbolMap);
+  } else {
+    for (const auto &Samples : Reader->getProfiles())
+      addCallGraphEdges(CG, Samples.second);
+  }
+}
+
 std::vector<Function *>
 SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   std::vector<Function *> FunctionOrderList;
@@ -2300,16 +2351,97 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   }
 
   assert(&CG->getModule() == &M);
+
+  // Add indirect call edges from profile to augment the static call graph.
+  // Functions will be processed in a top-down order defined by the static call
+  // graph. Adjusting the order by considering indirect call edges from the
+  // profile (which don't exist in the static call graph) can enable the
+  // inlining of indirect call targets by processing the caller before them.
+  // TODO: enable this for non-CS profile and fix the counts returning logic to
+  // have a full support for indirect calls.
+  if (UseProfileIndirectCallEdges && ProfileIsCS) {
+    for (auto &Entry : *CG) {
+      const auto *F = Entry.first;
+      if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
+        continue;
+      auto &AllContexts = ContextTracker->getAllContextSamplesFor(F->getName());
+      if (AllContexts.empty())
+        continue;
+
+      for (const auto &BB : *F) {
+        for (const auto &I : BB.getInstList()) {
+          const auto *CB = dyn_cast<CallBase>(&I);
+          if (!CB || !CB->isIndirectCall())
+            continue;
+          const DebugLoc &DLoc = I.getDebugLoc();
+          if (!DLoc)
+            continue;
+          auto CallSite = FunctionSamples::getCallSiteIdentifier(DLoc);
+          for (FunctionSamples *Samples : AllContexts) {
+            if (auto CallTargets = Samples->findCallTargetMapAt(CallSite)) {
+              for (const auto &Target : CallTargets.get()) {
+                Function *Callee = SymbolMap.lookup(Target.first());
+                if (Callee && !Callee->isDeclaration())
+                  Entry.second->addCalledFunction(nullptr, (*CG)[Callee]);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Compute a top-down order the profile which is used to sort functions in
+  // one SCC later. The static processing order computed for an SCC may not
+  // reflect the call contexts in the context-sensitive profile, thus may cause
+  // potential inlining to be overlooked. The function order in one SCC is being
+  // adjusted to a top-down order based on the profile to favor more inlining.
+  DenseMap<Function *, uint64_t> ProfileOrderMap;
+  if (UseProfileTopDownOrder ||
+      (ProfileIsCS && !UseProfileTopDownOrder.getNumOccurrences())) {
+    // Create a static call graph. The call edges are not important since they
+    // will be replaced by dynamic edges from the profile.
+    CallGraph ProfileCG(M);
+    replaceCallGraphEdges(ProfileCG, SymbolMap);
+    scc_iterator<CallGraph *> CGI = scc_begin(&ProfileCG);
+    uint64_t I = 0;
+    while (!CGI.isAtEnd()) {
+      for (CallGraphNode *Node : *CGI) {
+        if (auto *F = Node->getFunction())
+          ProfileOrderMap[F] = ++I;
+      }
+      ++CGI;
+    }
+  }
+
   scc_iterator<CallGraph *> CGI = scc_begin(CG);
   while (!CGI.isAtEnd()) {
-    for (CallGraphNode *node : *CGI) {
-      auto F = node->getFunction();
+    uint64_t Start = FunctionOrderList.size();
+    for (CallGraphNode *Node : *CGI) {
+      auto *F = Node->getFunction();
       if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
         FunctionOrderList.push_back(F);
     }
+
+    // Sort nodes in SCC based on the profile top-down order.
+    if (!ProfileOrderMap.empty()) {
+      std::stable_sort(FunctionOrderList.begin() + Start,
+                       FunctionOrderList.end(),
+                       [&ProfileOrderMap](Function *Left, Function *Right) {
+                         return ProfileOrderMap[Left] < ProfileOrderMap[Right];
+                       });
+    }
+
     ++CGI;
   }
 
+  LLVM_DEBUG({
+    dbgs() << "Function processing order:\n";
+    for (auto F : reverse(FunctionOrderList)) {
+      dbgs() << F->getName() << "\n";
+    }
+  });
+
   std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
   return FunctionOrderList;
 }
@@ -2461,6 +2593,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
 }
 
 bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
+  LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
   DILocation2SampleMap.clear();
   // By default the entry count is initialized to -1, which will be treated
   // conservatively by getEntryCount as the same as unknown (None). This is
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof
new file mode 100644
index 000000000000..f941b5053ee6
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-context-order.prof
@@ -0,0 +1,38 @@
+[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]:1467299:11
+ 0: 6
+ 1: 6
+ 3: 287884
+ 15: 23
+[main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
+ 0: 15
+ 1: 15
+ 3: 74946
+ 10: 23324
+ 15: 11
+[main]:154:0
+ 2: 12
+ 3: 18 _Z5funcAi:11
+ 3.1: 18 _Z5funcBi:19
+[external:12 @ main]:154:12
+ 2: 12
+ 3: 10 _Z5funcAi:7
+ 3.1: 10 _Z5funcBi:11
+[main:3.1 @ _Z5funcBi]:120:19
+ 0: 19
+ 1: 19 _Z8funcLeafi:20
+ 3: 12
+[externalA:17 @ _Z5funcBi]:120:3
+ 0: 3
+ 1: 3
+[external:10 @ _Z5funcBi]:120:10
+ 0: 10
+ 1: 10
+[main:3 @ _Z5funcAi]:99:11
+ 0: 10
+ 1: 10 _Z8funcLeafi:11
+ 2: 287864 _Z3fibi:315608
+ 3: 24
+[main:3 @ _Z5funcAi:2 @ _Z3fibi]:287864:315608
+ 0: 362839
+ 1: 6
+ 3: 287884
\ No newline at end of file
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-topdown-order.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-topdown-order.prof
new file mode 100644
index 000000000000..fa8be3305de0
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-topdown-order.prof
@@ -0,0 +1,36 @@
+_Z8funcLeafi:500853:20
+ 0: 15
+ 1: 15
+ 3: 74946
+ 10: 23324
+ 15: 11
+main:154:0
+ 2: 12
+ 3: 18 _Z5funcAi:11
+ 3.1: 18 _Z5funcBi:19
+main:154:12
+ 2: 12
+ 3: 10 _Z5funcAi:7
+ 3.1: 10 _Z5funcBi:11
+_Z5funcBi:120:19
+ 0: 19
+ 1: 19 _Z8funcLeafi:20
+ 3: 12
+_Z5funcBi:120:3
+ 0: 3
+ 1: 3
+_Z5funcBi:120:10
+ 0: 10
+ 1: 10
+_Z5funcAi:99:11
+ 0: 10
+ 1: _Z8funcLeafi:40
+   0: 6
+   1: 6
+   3: 2
+   15: 23
+ 2: 315608 _Z3fibi:362839
+   0: 315608
+   1: 6
+   3: 287884
+ 3: 24
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-order.ll b/llvm/test/Transforms/SampleProfile/profile-context-order.ll
new file mode 100644
index 000000000000..a75dcc2179ca
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/profile-context-order.ll
@@ -0,0 +1,190 @@
+;; Test for different function processing orders affecting inlining in sample profile loader.
+
+;; There is an SCC _Z5funcAi -> _Z8funcLeafi -> _Z5funcAi in the program.
+;; With -use-profile-top-down-order=0, the top-down processing order of
+;; that SCC is (_Z8funcLeafi, _Z5funcAi), which is determinined based on
+;; the static call graph. With -use-profile-top-down-order=1, call edges
+;; from profile are considered, thus the order becomes (_Z5funcAi, _Z8funcLeafi)
+;; which leads to _Z8funcLeafi inlined into _Z5funcAi.
+; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=INLINE
+; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=0 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=NOINLINE
+
+;; There is an indirect call _Z5funcAi -> _Z3fibi in the program.
+;; With -use-profile-indirect-call-edges=0, the processing order computed
+;; based on the static call graph is (_Z3fibi, _Z5funcAi). With 
+;; -use-profile-top-down-order=1, the indirect call edge from profile is
+;; considered, thus the order becomes (_Z5funcAi, _Z3fibi) which leads to
+;; _Z3fibi inlined into _Z5funcAi.
+; RUN: opt < %s -passes=sample-profile -use-profile-indirect-call-edges=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-INLINE
+; RUN: opt < %s -passes=sample-profile -use-profile-indirect-call-edges=0 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-NOINLINE
+
+@factor = dso_local global i32 3, align 4, !dbg !0
+@fp = dso_local global i32 (i32)* null, align 8
+
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
+entry:
+  store i32 (i32)* @_Z3fibi, i32 (i32)** @fp, align 8, !dbg !25
+  br label %for.body, !dbg !25
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add3, !dbg !27
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
+  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
+  %add = add nuw nsw i32 %x.011, 1, !dbg !31
+  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
+  %add2 = add i32 %call, %r.010, !dbg !34
+  %add3 = add i32 %add2, %call1, !dbg !35
+  %dec = add nsw i32 %x.011, -1, !dbg !36
+  %cmp = icmp eq i32 %x.011, 0, !dbg !38
+  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
+}
+
+; INLINE: define dso_local i32 @_Z5funcAi
+; INLINE-NOT: call i32 @_Z8funcLeafi
+; NOINLINE: define dso_local i32 @_Z5funcAi
+; NOINLINE: call i32 @_Z8funcLeafi
+; ICALL-INLINE: define dso_local i32 @_Z5funcAi
+; ICALL-INLINE: call i32 @_Z3foo
+; ICALL-NOINLINE: define dso_local i32 @_Z5funcAi
+; ICALL-NOINLINE-NO: call i32 @_Z3foo
+; ICALL-NOINLINE-NO: call i32 @_Z3fibi
+define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 {
+entry:
+  %add = add nsw i32 %x, 100000, !dbg !44
+  %0 = load i32 (i32)*, i32 (i32)** @fp, align 8
+  %call = call i32 %0(i32 8), !dbg !45
+  %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46
+  ret i32 %call, !dbg !46
+}
+
+; INLINE: define dso_local i32 @_Z8funcLeafi
+; NOINLINE: define dso_local i32 @_Z8funcLeafi
+; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi
+; ICALL-NOINLINE: define dso_local i32 @_Z8funcLeafi
+define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
+entry:
+  %cmp = icmp sgt i32 %x, 0, !dbg !57
+  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
+
+while.cond2.preheader:                            ; preds = %entry
+  %cmp313 = icmp slt i32 %x, 0, !dbg !60
+  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
+  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
+  %call = tail call i32 @_Z5funcAi(i32 %tmp), !dbg !67
+  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
+  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
+  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
+
+while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
+  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
+  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
+  %call5 = tail call i32 @_Z5funcBi(i32 %tmp1), !dbg !74
+  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
+  %cmp3 = icmp slt i32 %add, 0, !dbg !60
+  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
+
+if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
+  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
+  ret i32 %x.addr.2, !dbg !76
+}
+
+define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !51
+  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
+  ret i32 %call, !dbg !53
+}
+
+define dso_local i32 @_Z3fibi(i32 %x) local_unnamed_addr #1 !dbg !77 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !78
+  %call = tail call i32 @_Z3foo(i32 %sub), !dbg !78
+  ret i32 %sub, !dbg !78
+}
+
+declare i32 @_Z3foo(i32)
+
+attributes #0 = { nofree noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #1 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
+!4 = !{}
+!5 = !{!6, !10, !11}
+!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!12 = !{!0}
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!14 = !{i32 7, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{i32 1, !"wchar_size", i32 4}
+!17 = !{!"clang version 11.0.0"}
+!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!9}
+!21 = !{!22, !23}
+!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
+!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
+!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
+!25 = !DILocation(line: 13, column: 3, scope: !26)
+!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
+!27 = !DILocation(line: 17, column: 3, scope: !18)
+!28 = !DILocation(line: 14, column: 10, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
+!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
+!31 = !DILocation(line: 14, column: 29, scope: !29)
+!32 = !DILocation(line: 14, column: 21, scope: !33)
+!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!34 = !DILocation(line: 14, column: 19, scope: !29)
+!35 = !DILocation(line: 14, column: 7, scope: !29)
+!36 = !DILocation(line: 13, column: 33, scope: !37)
+!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
+!38 = !DILocation(line: 13, column: 26, scope: !39)
+!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
+!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!44 = !DILocation(line: 26, column: 22, scope: !40)
+!45 = !DILocation(line: 28, column: 11, scope: !40)
+!46 = !DILocation(line: 27, column: 3, scope: !40)
+!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!51 = !DILocation(line: 33, column: 22, scope: !47)
+!52 = !DILocation(line: 33, column: 11, scope: !47)
+!53 = !DILocation(line: 35, column: 3, scope: !47)
+!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!57 = !DILocation(line: 49, column: 9, scope: !58)
+!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
+!59 = !DILocation(line: 49, column: 7, scope: !54)
+!60 = !DILocation(line: 58, column: 14, scope: !61)
+!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
+!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
+!63 = !DILocation(line: 58, column: 5, scope: !61)
+!64 = !DILocation(line: 52, column: 16, scope: !65)
+!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
+!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
+!67 = !DILocation(line: 52, column: 12, scope: !65)
+!68 = !DILocation(line: 52, column: 9, scope: !65)
+!69 = !DILocation(line: 51, column: 14, scope: !70)
+!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
+!71 = !DILocation(line: 51, column: 5, scope: !70)
+!72 = !DILocation(line: 59, column: 16, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
+!74 = !DILocation(line: 59, column: 12, scope: !73)
+!75 = !DILocation(line: 59, column: 9, scope: !73)
+!76 = !DILocation(line: 63, column: 3, scope: !54)
+!77 = distinct !DISubprogram(name: "funcB", linkageName: "_Z3fibi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!78 = !DILocation(line: 33, column: 22, scope: !77)
diff --git a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll
new file mode 100644
index 000000000000..3a343fa3cc8a
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll
@@ -0,0 +1,179 @@
+;; Test for different function processing orders affecting inlining in sample profile loader.
+
+;; There is an SCC _Z5funcAi -> _Z8funcLeafi -> _Z5funcAi in the program.
+;; With -use-profile-top-down-order=0, the top-down processing order of
+;; that SCC is (_Z8funcLeafi, _Z5funcAi), which is determinined based on
+;; the static call graph. With -use-profile-top-down-order=1, call edges
+;; from profile are considered, thus the order becomes (_Z5funcAi, _Z8funcLeafi).
+;; While _Z8funcLeafi is not supposed to be inlined, the outlined entry counts
+;; are affected.
+; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=0 -sample-profile-file=%S/Inputs/profile-topdown-order.prof -S | FileCheck %s -check-prefix=STATIC
+; RUN: opt < %s -passes=sample-profile -use-profile-top-down-order=1 -sample-profile-file=%S/Inputs/profile-topdown-order.prof -S | FileCheck %s -check-prefix=DYNAMIC
+
+
+; STATIC:  define dso_local i32 @_Z8funcLeafi{{.*}} !prof ![[#PROF:]]
+; STATIC:  ![[#PROF]] = !{!"function_entry_count", i64 21}
+; DYNAMIC: define dso_local i32 @_Z8funcLeafi{{.*}} !prof ![[#PROF:]]
+; DYNAMIC: ![[#PROF]] = !{!"function_entry_count", i64 27}
+
+@factor = dso_local global i32 3, align 4, !dbg !0
+@fp = dso_local global i32 (i32)* null, align 8
+
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
+entry:
+  store i32 (i32)* @_Z3fibi, i32 (i32)** @fp, align 8, !dbg !25
+  br label %for.body, !dbg !25
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add3, !dbg !27
+
+for.body:                                         ; preds = %for.body, %entry
+  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
+  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
+  %add = add nuw nsw i32 %x.011, 1, !dbg !31
+  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
+  %add2 = add i32 %call, %r.010, !dbg !34
+  %add3 = add i32 %add2, %call1, !dbg !35
+  %dec = add nsw i32 %x.011, -1, !dbg !36
+  %cmp = icmp eq i32 %x.011, 0, !dbg !38
+  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
+}
+
+define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 {
+entry:
+  %add = add nsw i32 %x, 100000, !dbg !44
+  %0 = load i32 (i32)*, i32 (i32)** @fp, align 8
+  %call = call i32 %0(i32 8), !dbg !45
+  %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46
+  ret i32 %call, !dbg !46
+}
+
+; INLINE: define dso_local i32 @_Z8funcLeafi
+; NOINLINE: define dso_local i32 @_Z8funcLeafi
+; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi
+; ICALL-NOINLINE: define dso_local i32 @_Z8funcLeafi
+define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
+entry:
+  %cmp = icmp sgt i32 %x, 0, !dbg !57
+  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
+
+while.cond2.preheader:                            ; preds = %entry
+  %cmp313 = icmp slt i32 %x, 0, !dbg !60
+  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
+
+while.body:                                       ; preds = %while.body, %entry
+  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
+  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
+  %call = tail call i32 @_Z5funcAi(i32 %tmp), !dbg !67
+  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
+  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
+  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
+
+while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
+  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
+  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
+  %call5 = tail call i32 @_Z5funcBi(i32 %tmp1), !dbg !74
+  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
+  %cmp3 = icmp slt i32 %add, 0, !dbg !60
+  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
+
+if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
+  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
+  ret i32 %x.addr.2, !dbg !76
+}
+
+define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !51
+  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
+  ret i32 %call, !dbg !53
+}
+
+define dso_local i32 @_Z3fibi(i32 %x) local_unnamed_addr #1 !dbg !77 {
+entry:
+  %sub = add nsw i32 %x, -100000, !dbg !78
+  %call = tail call i32 @_Z3foo(i32 %sub), !dbg !78
+  ret i32 %sub, !dbg !78
+}
+
+declare i32 @_Z3foo(i32)
+
+attributes #0 = { nofree noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+attributes #1 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
+!4 = !{}
+!5 = !{!6, !10, !11}
+!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
+!12 = !{!0}
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!14 = !{i32 7, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{i32 1, !"wchar_size", i32 4}
+!17 = !{!"clang version 11.0.0"}
+!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!9}
+!21 = !{!22, !23}
+!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
+!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
+!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
+!25 = !DILocation(line: 13, column: 3, scope: !26)
+!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
+!27 = !DILocation(line: 17, column: 3, scope: !18)
+!28 = !DILocation(line: 14, column: 10, scope: !29)
+!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
+!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
+!31 = !DILocation(line: 14, column: 29, scope: !29)
+!32 = !DILocation(line: 14, column: 21, scope: !33)
+!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
+!34 = !DILocation(line: 14, column: 19, scope: !29)
+!35 = !DILocation(line: 14, column: 7, scope: !29)
+!36 = !DILocation(line: 13, column: 33, scope: !37)
+!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
+!38 = !DILocation(line: 13, column: 26, scope: !39)
+!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
+!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!44 = !DILocation(line: 26, column: 22, scope: !40)
+!45 = !DILocation(line: 28, column: 11, scope: !40)
+!46 = !DILocation(line: 27, column: 3, scope: !40)
+!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!51 = !DILocation(line: 33, column: 22, scope: !47)
+!52 = !DILocation(line: 33, column: 11, scope: !47)
+!53 = !DILocation(line: 35, column: 3, scope: !47)
+!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!57 = !DILocation(line: 49, column: 9, scope: !58)
+!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
+!59 = !DILocation(line: 49, column: 7, scope: !54)
+!60 = !DILocation(line: 58, column: 14, scope: !61)
+!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
+!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
+!63 = !DILocation(line: 58, column: 5, scope: !61)
+!64 = !DILocation(line: 52, column: 16, scope: !65)
+!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
+!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
+!67 = !DILocation(line: 52, column: 12, scope: !65)
+!68 = !DILocation(line: 52, column: 9, scope: !65)
+!69 = !DILocation(line: 51, column: 14, scope: !70)
+!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
+!71 = !DILocation(line: 51, column: 5, scope: !70)
+!72 = !DILocation(line: 59, column: 16, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
+!74 = !DILocation(line: 59, column: 12, scope: !73)
+!75 = !DILocation(line: 59, column: 9, scope: !73)
+!76 = !DILocation(line: 63, column: 3, scope: !54)
+!77 = distinct !DISubprogram(name: "funcB", linkageName: "_Z3fibi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!78 = !DILocation(line: 33, column: 22, scope: !77)

From 989b5c9571922ddfecae78a1351d0c801bfbf97b Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Thu, 11 Feb 2021 14:51:47 -0800
Subject: [PATCH 146/244] Remove test code that cause MSAN failure.

Summary:
The negative test (with the feature being added disabled) caused MSAN failure and that's the added feature is supposed to fix. Therefore the negative test code is being removed.
---
 llvm/test/Transforms/SampleProfile/profile-context-order.ll | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/test/Transforms/SampleProfile/profile-context-order.ll b/llvm/test/Transforms/SampleProfile/profile-context-order.ll
index a75dcc2179ca..c99cc15850b7 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-order.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-order.ll
@@ -16,7 +16,6 @@
 ;; considered, thus the order becomes (_Z5funcAi, _Z3fibi) which leads to
 ;; _Z3fibi inlined into _Z5funcAi.
 ; RUN: opt < %s -passes=sample-profile -use-profile-indirect-call-edges=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-INLINE
-; RUN: opt < %s -passes=sample-profile -use-profile-indirect-call-edges=0 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-NOINLINE
 
 @factor = dso_local global i32 3, align 4, !dbg !0
 @fp = dso_local global i32 (i32)* null, align 8
@@ -48,9 +47,6 @@ for.body:                                         ; preds = %for.body, %entry
 ; NOINLINE: call i32 @_Z8funcLeafi
 ; ICALL-INLINE: define dso_local i32 @_Z5funcAi
 ; ICALL-INLINE: call i32 @_Z3foo
-; ICALL-NOINLINE: define dso_local i32 @_Z5funcAi
-; ICALL-NOINLINE-NO: call i32 @_Z3foo
-; ICALL-NOINLINE-NO: call i32 @_Z3fibi
 define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 {
 entry:
   %add = add nsw i32 %x, 100000, !dbg !44

From beb80ffee6a1a816cfeb4047926f412c1a2456d9 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Wed, 3 Feb 2021 14:13:06 -0800
Subject: [PATCH 147/244] [CSSPGO][llvm-profgen] Add brackets for context id to
 support extended binary format

To align with https://reviews.llvm.org/D95547, we need to add brackets for context id before initializing the `SampleContext`.

Also added test cases for extended binary format from llvm-profgen side.

Differential Revision: https://reviews.llvm.org/D95929
---
 llvm/lib/ProfileData/SampleProfWriter.cpp     |  5 +-
 .../test/tools/llvm-profgen/cs-extbinary.test | 14 +++++
 .../tools/llvm-profgen/inline-cs-noprobe.test | 11 ++--
 .../llvm-profgen/noinline-cs-noprobe.test     |  2 +
 .../recursion-compression-noprobe.test        | 11 ++--
 .../recursion-compression-pseudoprobe.test    | 63 ++++++++++---------
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 58 ++++++++++-------
 llvm/tools/llvm-profgen/ProfileGenerator.h    | 12 ++--
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    |  4 +-
 9 files changed, 104 insertions(+), 76 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profgen/cs-extbinary.test

diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index b388b78dfaca..8017f2a82804 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -360,10 +360,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
 /// it needs to be parsed by the SampleProfileReaderText class.
 std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
   auto &OS = *OutputStream;
-  if (FunctionSamples::ProfileIsCS)
-    OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples();
-  else
-    OS << S.getName() << ":" << S.getTotalSamples();
+  OS << S.getNameWithContext(true) << ":" << S.getTotalSamples();
   if (Indent == 0)
     OS << ":" << S.getHeadSamples();
   OS << "\n";
diff --git a/llvm/test/tools/llvm-profgen/cs-extbinary.test b/llvm/test/tools/llvm-profgen/cs-extbinary.test
new file mode 100644
index 000000000000..8acce173d405
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/cs-extbinary.test
@@ -0,0 +1,14 @@
+; test for dwarf-based cs profile
+; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t1 --csprof-cold-thres=0
+; RUN: llvm-profdata merge --sample --text --output=%t2 %t1
+; RUN: FileCheck %S/recursion-compression-noprobe.test --input-file %t2
+; RUN: llvm-profdata merge --sample --extbinary --output=%t3 %t2 && llvm-profdata merge --sample --text --output=%t4 %t3
+; RUN: diff -b %t2 %t4
+
+
+; test for probe-based cs profile
+; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t5 --csprof-cold-thres=0
+; RUN: llvm-profdata merge --sample --text --output=%t6 %t5
+; RUN: FileCheck %S/recursion-compression-pseudoprobe.test --input-file %t6
+; RUN: llvm-profdata merge --sample --extbinary --output=%t7 %t6 && llvm-profdata merge --sample --text --output=%t8 %t7
+; RUN: diff -b %t6 %t8
diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
index 943832ebef10..d8cc1932f877 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
@@ -2,11 +2,11 @@
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:[main:1 @ foo]:44:0
-; CHECK: 2.2: 14
+; CHECK: 2.1: 14
 ; CHECK: 3: 15
-; CHECK: 3.2: 14 bar:14
-; CHECK: 3.4: 1
-; CHECK:[main:1 @ foo:3.2 @ bar]:14:0
+; CHECK: 3.1: 14 bar:14
+; CHECK: 3.2: 1
+; CHECK:[main:1 @ foo:3.1 @ bar]:14:0
 ; CHECK: 1: 14
 
 ; CHECK-UNWINDER: Binary(inline-cs-noprobe.perfbin)'s Range Counter:
@@ -15,10 +15,9 @@
 ; CHECK-UNWINDER:   (67e, 69b): 1
 ; CHECK-UNWINDER:   (67e, 6ad): 13
 ; CHECK-UNWINDER:   (6bd, 6c8): 14
-; CHECK-UNWINDER: main:1 @ foo:3.2 @ bar
+; CHECK-UNWINDER: main:1 @ foo:3.1 @ bar
 ; CHECK-UNWINDER:   (6af, 6bb): 14
 
-
 ; CHECK-UNWINDER: Binary(inline-cs-noprobe.perfbin)'s Branch Counter:
 ; CHECK-UNWINDER: main:1 @ foo
 ; CHECK-UNWINDER:   (69b, 670): 1
diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
index 2e60883afa62..9d5c787e7f92 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
@@ -36,6 +36,8 @@
 
 
+
+
 ; original code:
 ; clang -O0 -g test.c -o a.out
 #include <stdio.h>
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
index 43f495398bb0..03bab8407435 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
@@ -10,16 +10,17 @@
 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0
 ; CHECK-UNCOMPRESS: 1: 11
 ; CHECK-UNCOMPRESS: 2: 1 fa:1
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0
-; CHECK-UNCOMPRESS: 1: 1
-; CHECK-UNCOMPRESS: 2: 2 fb:1
 ; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0
 ; CHECK-UNCOMPRESS: 2: 1
 ; CHECK-UNCOMPRESS: 3: 2 fa:1
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0
-; CHECK-UNCOMPRESS: 4: 1
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0
+; CHECK-UNCOMPRESS: 1: 1
+; CHECK-UNCOMPRESS: 2: 2 fb:1
 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
 ; CHECK-UNCOMPRESS: 2: 1 fa:1
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0
+; CHECK-UNCOMPRESS: 4: 1
+
 
 ; CHECK: [main:1 @ foo:3 @ fa]:14:0
 ; CHECK:  1: 1
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
index 0d4e7dbb1dd4..0936e5d615ca 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
@@ -4,11 +4,11 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
 ; CHECK-UNCOMPRESS:  1: 1
 ; CHECK-UNCOMPRESS:  3: 1
-; CHECK-UNCOMPRESS:  4: 1
-; CHECK-UNCOMPRESS:  7: 1 fb:1
+; CHECK-UNCOMPRESS:  5: 1
+; CHECK-UNCOMPRESS:  8: 1 fa:1
 ; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
 ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
 ; CHECK-UNCOMPRESS:  1: 1
@@ -16,28 +16,13 @@
 ; CHECK-UNCOMPRESS:  4: 1
 ; CHECK-UNCOMPRESS:  7: 1 fb:1
 ; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa]:4:1
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1
 ; CHECK-UNCOMPRESS:  1: 1
 ; CHECK-UNCOMPRESS:  3: 1
-; CHECK-UNCOMPRESS:  5: 1
-; CHECK-UNCOMPRESS:  8: 1 fa:1
+; CHECK-UNCOMPRESS:  4: 1
+; CHECK-UNCOMPRESS:  7: 1 fb:1
 ; CHECK-UNCOMPRESS:  !CFGChecksum: 120515930909
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
-; CHECK-UNCOMPRESS:  1: 1
-; CHECK-UNCOMPRESS:  3: 1
-; CHECK-UNCOMPRESS:  6: 1 fa:1
-; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
-; CHECK-UNCOMPRESS:  1: 1
-; CHECK-UNCOMPRESS:  3: 1
-; CHECK-UNCOMPRESS:  6: 1 fa:1
-; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
-; CHECK-UNCOMPRESS:  1: 1
-; CHECK-UNCOMPRESS:  3: 1
-; CHECK-UNCOMPRESS:  6: 1 fa:1
-; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
 ; CHECK-UNCOMPRESS:  1: 1
 ; CHECK-UNCOMPRESS:  2: 1
 ; CHECK-UNCOMPRESS:  5: 1 fb:1
@@ -47,11 +32,26 @@
 ; CHECK-UNCOMPRESS:  2: 1
 ; CHECK-UNCOMPRESS:  5: 1 fb:1
 ; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
-; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
 ; CHECK-UNCOMPRESS:  1: 1
 ; CHECK-UNCOMPRESS:  2: 1
 ; CHECK-UNCOMPRESS:  5: 1 fb:1
 ; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  6: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  6: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
+; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
+; CHECK-UNCOMPRESS:  1: 1
+; CHECK-UNCOMPRESS:  3: 1
+; CHECK-UNCOMPRESS:  6: 1 fa:1
+; CHECK-UNCOMPRESS:  !CFGChecksum: 72617220756
 ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb:6 @ fa]:2:1
 ; CHECK-UNCOMPRESS:  1: 1
 ; CHECK-UNCOMPRESS:  3: 1
@@ -74,30 +74,31 @@
 ; CHECK:  4: 1
 ; CHECK:  7: 1 fb:1
 ; CHECK:  !CFGChecksum: 120515930909
-; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
-; CHECK:  1: 1
-; CHECK:  3: 1
-; CHECK:  4: 1
-; CHECK:  7: 1 fb:1
-; CHECK:  !CFGChecksum: 120515930909
 ; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa]:4:1
 ; CHECK:  1: 1
 ; CHECK:  3: 1
 ; CHECK:  5: 1
 ; CHECK:  8: 1 fa:1
 ; CHECK:  !CFGChecksum: 120515930909
-; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa]:4:1
+; CHECK:  1: 1
+; CHECK:  3: 1
+; CHECK:  4: 1
+; CHECK:  7: 1 fb:1
+; CHECK:  !CFGChecksum: 120515930909
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
 ; CHECK:  1: 1
 ; CHECK:  3: 1
 ; CHECK:  6: 1 fa:1
 ; CHECK:  !CFGChecksum: 72617220756
-; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb]:3:1
+; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa:7 @ fb]:3:1
 ; CHECK:  1: 1
 ; CHECK:  3: 1
 ; CHECK:  6: 1 fa:1
 ; CHECK:  !CFGChecksum: 72617220756
 
 
+
 ; CHECK-UNWINDER: Binary(recursion-compression-pseudoprobe.perfbin)'s Range Counter:
 ; CHECK-UNWINDER: main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5
 ; CHECK-UNWINDER:   (7a0, 7a7): 1
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index b2a8d60d5caf..0a7dddc06bfc 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -76,13 +76,16 @@ ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
   return ProfileGenerator;
 }
 
+void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
+                             StringMap<FunctionSamples> &ProfileMap) {
+  Writer->write(ProfileMap);
+}
+
 void ProfileGenerator::write() {
   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
   if (std::error_code EC = WriterOrErr.getError())
     exitWithError(EC, OutputFilename);
-  auto Writer = std::move(WriterOrErr.get());
-  mergeAndTrimColdProfile(ProfileMap);
-  Writer->write(ProfileMap);
+  write(std::move(WriterOrErr.get()), ProfileMap);
 }
 
 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
@@ -188,7 +191,6 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr) {
   if (Ret.second) {
     SampleContext FContext(Ret.first->first(), RawContext);
     FunctionSamples &FProfile = Ret.first->second;
-    FProfile.setName(FContext.getNameWithoutContext());
     FProfile.setContext(FContext);
   }
   return Ret.first->second;
@@ -268,16 +270,15 @@ void CSProfileGenerator::populateFunctionBoundarySamples(
                                            CalleeName, Count);
 
     // Record head sample for called target(callee)
-    // TODO: Cleanup ' @ '
-    std::string CalleeContextId =
-        getCallSite(LeafLoc) + " @ " + CalleeName.str();
+    std::ostringstream OCalleeCtxStr;
     if (ContextId.find(" @ ") != StringRef::npos) {
-      CalleeContextId =
-          ContextId.rsplit(" @ ").first.str() + " @ " + CalleeContextId;
+      OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
+      OCalleeCtxStr << " @ ";
     }
+    OCalleeCtxStr << getCallSite(LeafLoc) << " @ " << CalleeName.str();
 
     FunctionSamples &CalleeProfile =
-        getFunctionProfileForContext(CalleeContextId);
+        getFunctionProfileForContext(OCalleeCtxStr.str());
     assert(Count != 0 && "Unexpected zero weight branch");
     CalleeProfile.addHeadSamples(Count);
   }
@@ -334,8 +335,8 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
       EstimatedCallCount = 1;
     CallerProfile.addCalledTargetSamples(
         CallerLeafFrameLoc.second.LineOffset,
-        CallerLeafFrameLoc.second.Discriminator, CalleeProfile.getName(),
-        EstimatedCallCount);
+        CallerLeafFrameLoc.second.Discriminator,
+        CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
                                  CallerLeafFrameLoc.second.Discriminator,
                                  EstimatedCallCount);
@@ -362,8 +363,8 @@ void CSProfileGenerator::mergeAndTrimColdProfile(
   // Remove the code profile from ProfileMap and merge them into BaseProileMap
   StringMap<FunctionSamples> BaseProfileMap;
   for (const auto &I : ToRemoveVec) {
-    auto Ret =
-        BaseProfileMap.try_emplace(I.second->getName(), FunctionSamples());
+    auto Ret = BaseProfileMap.try_emplace(
+        I.second->getContext().getNameWithoutContext(), FunctionSamples());
     FunctionSamples &BaseProfile = Ret.first->second;
     BaseProfile.merge(*I.second);
     ProfileMap.erase(I.first);
@@ -378,14 +379,27 @@ void CSProfileGenerator::mergeAndTrimColdProfile(
     // Merge the profile if the original profile exists, otherwise just insert
     // as a new profile
     FunctionSamples &OrigProfile = getFunctionProfileForContext(I.getKey());
-    StringRef TmpName = OrigProfile.getName();
     OrigProfile.merge(I.second);
-    // Should use the name ref from ProfileMap's key to avoid name being freed
-    // from BaseProfileMap
-    OrigProfile.setName(TmpName);
   }
 }
 
+void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
+                               StringMap<FunctionSamples> &ProfileMap) {
+  mergeAndTrimColdProfile(ProfileMap);
+  // Add bracket for context key to support different profile binary format
+  StringMap<FunctionSamples> CxtWithBracketPMap;
+  for (const auto &Item : ProfileMap) {
+    std::string ContextWithBracket = "[" + Item.first().str() + "]";
+    auto Ret = CxtWithBracketPMap.try_emplace(ContextWithBracket, Item.second);
+    assert(Ret.second && "Must be a unique context");
+    SampleContext FContext(Ret.first->first(), RawContext);
+    FunctionSamples &FProfile = Ret.first->second;
+    FProfile.setName(FContext.getNameWithContext(true));
+    FProfile.setContext(FContext);
+  }
+  Writer->write(CxtWithBracketPMap);
+}
+
 // Helper function to extract context prefix string stack
 // Extract context stack for reusing, leaf context stack will
 // be added compressed while looking up function profile
@@ -399,8 +413,7 @@ extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
 }
 
 void PseudoProbeCSProfileGenerator::generateProfile() {
-  // Enable CS and pseudo probe functionalities in SampleProf
-  FunctionSamples::ProfileIsCS = true;
+  // Enable pseudo probe functionalities in SampleProf
   FunctionSamples::ProfileIsProbeBased = true;
   for (const auto &BI : BinarySampleCounters) {
     ProfiledBinary *Binary = BI.first;
@@ -495,8 +508,9 @@ void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
         CallerProfile.addBodySamples(CallerIndex, 0, Count);
         CallerProfile.addTotalSamples(Count);
-        CallerProfile.addCalledTargetSamples(CallerIndex, 0,
-                                             FunctionProfile.getName(), Count);
+        CallerProfile.addCalledTargetSamples(
+            CallerIndex, 0,
+            FunctionProfile.getContext().getNameWithoutContext(), Count);
       }
     }
   }
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 9cb04c4de34d..ff014ed79ce1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -28,11 +28,9 @@ class ProfileGenerator {
   create(const BinarySampleCounterMap &BinarySampleCounters,
          enum PerfScriptType SampleType);
   virtual void generateProfile() = 0;
-  // Merge and trim profile with cold context before serialization,
-  // only eligible for CS profile
-  virtual void
-  mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap){};
   // Use SampleProfileWriter to serialize profile map
+  virtual void write(std::unique_ptr<SampleProfileWriter> Writer,
+                     StringMap<FunctionSamples> &ProfileMap);
   void write();
 
 protected:
@@ -68,8 +66,6 @@ class CSProfileGenerator : public ProfileGenerator {
 
 public:
   void generateProfile() override {
-    // Enable context-sensitive functionalities in SampleProf
-    FunctionSamples::ProfileIsCS = true;
     for (const auto &BI : BinarySampleCounters) {
       ProfiledBinary *Binary = BI.first;
       for (const auto &CI : BI.second) {
@@ -205,7 +201,9 @@ class CSProfileGenerator : public ProfileGenerator {
   FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
   // Merge cold context profile whose total sample is below threshold
   // into base profile.
-  void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap) override;
+  void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap);
+  void write(std::unique_ptr<SampleProfileWriter> Writer,
+             StringMap<FunctionSamples> &ProfileMap) override;
 
 private:
   // Helper function for updating body sample for a leaf location in
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2c6cedf57649..e1549b14bf05 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -11,6 +11,7 @@
 #include "ProfileGenerator.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Demangle/Demangle.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -393,7 +394,8 @@ FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP,
     if (UseCanonicalFnName)
       FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
     LineLocation Line(CallerFrame.Line - CallerFrame.StartLine,
-                      CallerFrame.Discriminator);
+                      DILocation::getBaseDiscriminatorFromDiscriminator(
+                          CallerFrame.Discriminator));
     FrameLocation Callsite(FunctionName.str(), Line);
     CallStack.push_back(Callsite);
   }

From 66873fb695370f5bd333e327ec77e4710c7891c2 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Tue, 9 Feb 2021 16:41:44 -0800
Subject: [PATCH 148/244] [CSSPGO][llvm-profgen] Renovate perfscript check and
 command line input validation

This include some changes related with PerfReader's the input check and command line change:

1) It appears there might be thousands of leading MMAP-Event line in the perfscript for large workload. For this case, the 4k threshold is not eligible to determine it's a hybrid sample. This change renovated the `isHybridPerfScript` by going through the script without threshold limitation checking whether there is a non-empty call stack immediately followed by a LBR sample. It will stop once it find a valid one.

2) Added several input validations for the command line switches in PerfReader.

3) Changed the command line `show-disassembly` to `show-disassembly-only`, it will print to stdout and exit early which leave an empty output profile.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D96387
---
 llvm/test/tools/llvm-profgen/disassemble.s    |  2 +-
 .../llvm-profgen/invalid-perfscript.test      |  9 +++
 .../llvm-profgen/pseudoprobe-decoding.test    |  2 +-
 llvm/test/tools/llvm-profgen/symbolize.ll     |  2 +-
 llvm/tools/llvm-profgen/PerfReader.cpp        | 68 ++++++++++++------
 llvm/tools/llvm-profgen/PerfReader.h          | 69 ++++++++++++++-----
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  |  2 +
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    | 26 ++++---
 llvm/tools/llvm-profgen/llvm-profgen.cpp      |  6 +-
 9 files changed, 131 insertions(+), 55 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profgen/invalid-perfscript.test

diff --git a/llvm/test/tools/llvm-profgen/disassemble.s b/llvm/test/tools/llvm-profgen/disassemble.s
index fc85fbe967e0..be03b5a6892b 100644
--- a/llvm/test/tools/llvm-profgen/disassemble.s
+++ b/llvm/test/tools/llvm-profgen/disassemble.s
@@ -1,6 +1,6 @@
 # REQUIRES: x86-registered-target
 # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
-# RUN: llvm-profgen --binary=%t --perfscript=%s --output=%t1 -show-disassembly -x86-asm-syntax=intel | FileCheck %s --match-full-lines
+# RUN: llvm-profgen --binary=%t --perfscript=%s --output=%t1 -show-disassembly-only -x86-asm-syntax=intel | FileCheck %s --match-full-lines
 
 # CHECK: Disassembly of section .text [0x0, 0x66]:
 # CHECK: <foo1>:
diff --git a/llvm/test/tools/llvm-profgen/invalid-perfscript.test b/llvm/test/tools/llvm-profgen/invalid-perfscript.test
new file mode 100644
index 000000000000..d795f85b1ea3
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/invalid-perfscript.test
@@ -0,0 +1,9 @@
+; RUN: llvm-profgen --perfscript=%s --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t 2>%t1
+; RUN: FileCheck %s --input-file %t1
+
+	          4005dc
+	          400634
+	          400684
+	    7f68c5788793
+
+; XFAIL: *
diff --git a/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test b/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test
index 5feaa97032ab..1d93a06d8e42 100644
--- a/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test
+++ b/llvm/test/tools/llvm-profgen/pseudoprobe-decoding.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%s  --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-pseudo-probe --show-disassembly | FileCheck %s
+; RUN: llvm-profgen --perfscript=%s  --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-pseudo-probe --show-disassembly-only | FileCheck %s
 
 PERF_RECORD_MMAP2 2854748/2854748: [0x400000(0x1000) @ 0 00:1d 123291722 526021]: r-xp /home/inline-cs-pseudoprobe.perfbin
 
diff --git a/llvm/test/tools/llvm-profgen/symbolize.ll b/llvm/test/tools/llvm-profgen/symbolize.ll
index 2fbc59e3d00d..9a436dec4c20 100644
--- a/llvm/test/tools/llvm-profgen/symbolize.ll
+++ b/llvm/test/tools/llvm-profgen/symbolize.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: x86-registered-target
 ; RUN: llc -filetype=obj %s -o %t
-; RUN: llvm-profgen --binary=%t --perfscript=%s --output=%t1 --show-disassembly -x86-asm-syntax=intel --show-source-locations | FileCheck %s --match-full-lines
+; RUN: llvm-profgen --binary=%t --perfscript=%s --output=%t1 --show-disassembly-only -x86-asm-syntax=intel --show-source-locations | FileCheck %s --match-full-lines
 
 ; CHECK: Disassembly of section .text [0x0, 0x4a]:
 ; CHECK: <funcA>:
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e59d8d93381b..2e0b71f38e6d 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -17,6 +17,9 @@ static cl::opt<bool> ShowUnwinderOutput("show-unwinder-output",
                                         cl::ZeroOrMore,
                                         cl::desc("Print unwinder output"));
 
+extern cl::opt<bool> ShowDisassemblyOnly;
+extern cl::opt<bool> ShowSourceLocations;
+
 namespace llvm {
 namespace sampleprof {
 
@@ -230,7 +233,44 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
   return true;
 }
 
-PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames) {
+void PerfReader::validateCommandLine(
+    cl::list<std::string> &BinaryFilenames,
+    cl::list<std::string> &PerfTraceFilenames) {
+  // Allow the invalid perfscript if we only use to show binary disassembly
+  if (!ShowDisassemblyOnly) {
+    for (auto &File : PerfTraceFilenames) {
+      if (!llvm::sys::fs::exists(File)) {
+        std::string Msg = "Input perf script(" + File + ") doesn't exist!";
+        exitWithError(Msg);
+      }
+    }
+  }
+  if (BinaryFilenames.size() > 1) {
+    // TODO: remove this if everything is ready to support multiple binaries.
+    exitWithError(
+        "Currently only support one input binary, multiple binaries' "
+        "profile will be merged in one profile and make profile "
+        "summary info inaccurate. Please use `llvm-perfdata` to merge "
+        "profiles from multiple binaries.");
+  }
+  for (auto &Binary : BinaryFilenames) {
+    if (!llvm::sys::fs::exists(Binary)) {
+      std::string Msg = "Input binary(" + Binary + ") doesn't exist!";
+      exitWithError(Msg);
+    }
+  }
+  if (CSProfileGenerator::MaxCompressionSize < -1) {
+    exitWithError("Value of --compress-recursion should >= -1");
+  }
+  if (ShowSourceLocations && !ShowDisassemblyOnly) {
+    exitWithError("--show-source-locations should work together with "
+                  "--show-disassembly-only!");
+  }
+}
+
+PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames,
+                       cl::list<std::string> &PerfTraceFilenames) {
+  validateCommandLine(BinaryFilenames, PerfTraceFilenames);
   // Load the binaries.
   for (auto Filename : BinaryFilenames)
     loadBinary(Filename, /*AllowNameConflict*/ false);
@@ -591,27 +631,13 @@ void PerfReader::parseAndAggregateTrace(StringRef Filename) {
 
 void PerfReader::checkAndSetPerfType(
     cl::list<std::string> &PerfTraceFilenames) {
-  bool HasHybridPerf = true;
   for (auto FileName : PerfTraceFilenames) {
-    if (!isHybridPerfScript(FileName)) {
-      HasHybridPerf = false;
-      break;
-    }
-  }
-
-  if (HasHybridPerf) {
-    PerfType = PERF_LBR_STACK;
-  } else {
-    // TODO: Support other type of perf script
-    PerfType = PERF_INVILID;
-  }
-
-  if (BinaryTable.size() > 1) {
-    // TODO: remove this if everything is ready to support multiple binaries.
-    exitWithError("Currently only support one input binary, multiple binaries' "
-                  "profile will be merged in one profile and make profile "
-                  "summary info inaccurate. Please use `perfdata` to merge "
-                  "profiles from multiple binaries.");
+    PerfScriptType Type = checkPerfScriptType(FileName);
+    if (Type == PERF_INVALID)
+      exitWithError("Invalid perf script input!");
+    if (PerfType != PERF_UNKNOWN && PerfType != Type)
+      exitWithError("Inconsistent sample among different perf scripts");
+    PerfType = Type;
   }
 }
 
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 7eaa4b846259..b802c212eb46 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -59,9 +59,10 @@ class TraceStream {
 
 // The type of perfscript
 enum PerfScriptType {
-  PERF_INVILID = 0,
-  PERF_LBR = 1,       // Only LBR sample
-  PERF_LBR_STACK = 2, // Hybrid sample including call stack and LBR stack.
+  PERF_UNKNOWN = 0,
+  PERF_INVALID = 1,
+  PERF_LBR = 2,       // Only LBR sample
+  PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack.
 };
 
 // The parsed LBR sample entry.
@@ -502,19 +503,52 @@ using BinarySampleCounterMap =
 class PerfReader {
 
 public:
-  PerfReader(cl::list<std::string> &BinaryFilenames);
-
-  // Hybrid sample(call stack + LBRs) profile traces are seprated by double line
-  // break, search for that within the first 4k charactors to avoid going
-  // through the whole file.
-  static bool isHybridPerfScript(StringRef FileName) {
-    auto BufOrError = MemoryBuffer::getFileOrSTDIN(FileName, 4000);
-    if (!BufOrError)
-      exitWithError(BufOrError.getError(), FileName);
-    auto Buffer = std::move(BufOrError.get());
-    if (Buffer->getBuffer().find("\n\n") == StringRef::npos)
+  PerfReader(cl::list<std::string> &BinaryFilenames,
+             cl::list<std::string> &PerfTraceFilenames);
+
+  // A LBR sample is like:
+  // 0x5c6313f/0x5c63170/P/-/-/0  0x5c630e7/0x5c63130/P/-/-/0 ...
+  // A heuristic for fast detection by checking whether a
+  // leading "  0x" and the '/' exist.
+  static bool isLBRSample(StringRef Line) {
+    if (!Line.startswith(" 0x"))
       return false;
-    return true;
+    if (Line.find('/') != StringRef::npos)
+      return true;
+    return false;
+  }
+
+  // The raw hybird sample is like
+  // e.g.
+  // 	          4005dc    # call stack leaf
+  //	          400634
+  //	          400684    # call stack root
+  // 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
+  //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
+  // Determine the perfscript contains hybrid samples(call stack + LBRs) by
+  // checking whether there is a non-empty call stack immediately followed by
+  // a LBR sample
+  static PerfScriptType checkPerfScriptType(StringRef FileName) {
+    TraceStream TraceIt(FileName);
+    uint64_t FrameAddr = 0;
+    while (!TraceIt.isAtEoF()) {
+      int32_t Count = 0;
+      while (!TraceIt.isAtEoF() &&
+             !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {
+        Count++;
+        TraceIt.advance();
+      }
+      if (!TraceIt.isAtEoF()) {
+        if (isLBRSample(TraceIt.getCurrentLine())) {
+          if (Count > 0)
+            return PERF_LBR_STACK;
+          else
+            return PERF_LBR;
+        }
+        TraceIt.advance();
+      }
+    }
+    return PERF_INVALID;
   }
 
   // The parsed MMap event
@@ -540,6 +574,9 @@ class PerfReader {
   }
 
 private:
+  /// Validate the command line input
+  void validateCommandLine(cl::list<std::string> &BinaryFilenames,
+                           cl::list<std::string> &PerfTraceFilenames);
   /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
   /// mapping between the binary name and its memory layout.
   ///
@@ -574,7 +611,7 @@ class PerfReader {
   BinarySampleCounterMap BinarySampleCounters;
   // Samples with the repeating time generated by the perf reader
   AggregatedCounter AggregatedSamples;
-  PerfScriptType PerfType;
+  PerfScriptType PerfType = PERF_UNKNOWN;
 };
 
 } // end namespace sampleprof
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 0a7dddc06bfc..553ea71ea1fc 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -11,6 +11,8 @@
 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                            cl::Required,
                                            cl::desc("Output profile file"));
+static cl::alias OutputA("o", cl::desc("Alias for --output"),
+                         cl::aliasopt(OutputFilename));
 
 static cl::opt<SampleProfileFormat> OutputFormat(
     "format", cl::desc("Format of output profile"), cl::init(SPF_Text),
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e1549b14bf05..d7588d680cca 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -22,16 +22,15 @@
 using namespace llvm;
 using namespace sampleprof;
 
-static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden,
-                                     cl::init(false), cl::ZeroOrMore,
-                                     cl::desc("Print disassembled code."));
+cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", cl::ReallyHidden,
+                                  cl::init(false), cl::ZeroOrMore,
+                                  cl::desc("Print disassembled code."));
 
-static cl::opt<bool> ShowSourceLocations("show-source-locations",
-                                         cl::ReallyHidden, cl::init(false),
-                                         cl::ZeroOrMore,
-                                         cl::desc("Print source locations."));
+cl::opt<bool> ShowSourceLocations("show-source-locations", cl::ReallyHidden,
+                                  cl::init(false), cl::ZeroOrMore,
+                                  cl::desc("Print source locations."));
 
-static cl::opt<bool> ShowPseudoProbe(
+cl::opt<bool> ShowPseudoProbe(
     "show-pseudo-probe", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore,
     cl::desc("Print pseudo probe section and disassembled info."));
 
@@ -199,7 +198,6 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
                                         SectionSymbolsTy &Symbols,
                                         const SectionRef &Section) {
-
   std::size_t SE = Symbols.size();
   uint64_t SectionOffset = Section.getAddress() - PreferredBaseAddress;
   uint64_t SectSize = Section.getSize();
@@ -211,7 +209,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
     return true;
 
   std::string &&SymbolName = Symbols[SI].Name.str();
-  if (ShowDisassembly)
+  if (ShowDisassemblyOnly)
     outs() << '<' << SymbolName << ">:\n";
 
   uint64_t Offset = StartOffset;
@@ -223,7 +221,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
                                 Offset + PreferredBaseAddress, nulls()))
       return false;
 
-    if (ShowDisassembly) {
+    if (ShowDisassemblyOnly) {
       if (ShowPseudoProbe) {
         ProbeDecoder.printProbeForAddress(outs(),
                                           Offset + PreferredBaseAddress);
@@ -257,7 +255,7 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
     Offset += Size;
   }
 
-  if (ShowDisassembly)
+  if (ShowDisassemblyOnly)
     outs() << "\n";
 
   FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str();
@@ -323,7 +321,7 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
   for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
     stable_sort(SecSyms.second);
 
-  if (ShowDisassembly)
+  if (ShowDisassemblyOnly)
     outs() << "\nDisassembly of " << FileName << ":\n";
 
   // Dissassemble a text section.
@@ -342,7 +340,7 @@ void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
     // Register the text section.
     TextSections.insert({SectionOffset, SectSize});
 
-    if (ShowDisassembly) {
+    if (ShowDisassemblyOnly) {
       StringRef SectionName = unwrapOrError(Section.getName(), FileName);
       outs() << "\nDisassembly of section " << SectionName;
       outs() << " [" << format("0x%" PRIx64, SectionOffset) << ", "
diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index 0f4d8f015439..081f1bb4fcf4 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -29,6 +29,8 @@ static cl::list<std::string>
                     llvm::cl::MiscFlags::CommaSeparated,
                     cl::desc("Path of profiled binary files"));
 
+extern cl::opt<bool> ShowDisassemblyOnly;
+
 using namespace llvm;
 using namespace sampleprof;
 
@@ -43,7 +45,9 @@ int main(int argc, const char *argv[]) {
   cl::ParseCommandLineOptions(argc, argv, "llvm SPGO profile generator\n");
 
   // Load binaries and parse perf events and samples
-  PerfReader Reader(BinaryFilenames);
+  PerfReader Reader(BinaryFilenames, PerfTraceFilenames);
+  if (ShowDisassemblyOnly)
+    return EXIT_SUCCESS;
   Reader.parsePerfTraces(PerfTraceFilenames);
 
   std::unique_ptr<ProfileGenerator> Generator = ProfileGenerator::create(

From 610b51c04d3ca6b58555fa30ae52ad9762f9cf86 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Wed, 10 Feb 2021 10:04:39 -0800
Subject: [PATCH 149/244] [CSSPGO][llvm-profgen] Filter out the instructions
 without location info for symbolizer

It appears some instructions doesn't have the debug location info and the symbolizer will return an empty call stack for them which will cause some crash later in profile unwinding. Actually we do not record the sample info for them, so this change just filter out those instruction.

As those instruction would appears at the begin and end of the instruction list, without them we need to add the boundary check for IP `advance` and `backward`.

Also for pseudo probe based profile, we actually don't need the symbolized location info, so here just change to use an empty stack for it. This could save half of the binary loading time.

Differential Revision: https://reviews.llvm.org/D96434
---
 .../tools/llvm-profgen/inline-cs-noprobe.test |  4 +--
 .../llvm-profgen/noinline-cs-noprobe.test     |  8 ++---
 .../recursion-compression-noprobe.test        | 30 +++++++++----------
 llvm/tools/llvm-profgen/PerfReader.cpp        |  4 +++
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 21 ++++++++-----
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    | 18 ++++++++---
 llvm/tools/llvm-profgen/ProfiledBinary.h      |  9 ++++--
 7 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
index d8cc1932f877..cb562e347a3e 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
@@ -1,12 +1,12 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
-; CHECK:[main:1 @ foo]:44:0
+; CHECK:[main:1 @ foo]:309:0
 ; CHECK: 2.1: 14
 ; CHECK: 3: 15
 ; CHECK: 3.1: 14 bar:14
 ; CHECK: 3.2: 1
-; CHECK:[main:1 @ foo:3.1 @ bar]:14:0
+; CHECK:[main:1 @ foo:3.1 @ bar]:84:0
 ; CHECK: 1: 14
 
 ; CHECK-UNWINDER: Binary(inline-cs-noprobe.perfbin)'s Range Counter:
diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
index 9d5c787e7f92..c5e6dc1111ca 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
@@ -1,15 +1,15 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
-; CHECK:[main:1 @ foo:3 @ bar]:12:3
+; CHECK:[main:1 @ foo]:54:0
+; CHECK: 2: 3
+; CHECK: 3: 3 bar:3
+; CHECK:[main:1 @ foo:3 @ bar]:50:3
 ; CHECK: 0: 3
 ; CHECK: 1: 3
 ; CHECK: 2: 2
 ; CHECK: 4: 1
 ; CHECK: 5: 3
-; CHECK:[main:1 @ foo]:6:0
-; CHECK: 2: 3
-; CHECK: 3: 3 bar:3
 
 ; CHECK-UNWINDER: Binary(noinline-cs-noprobe.perfbin)'s Range Counter:
 ; CHECK-UNWINDER: main:1 @ foo
diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
index 03bab8407435..15bdd870879e 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
@@ -4,38 +4,38 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t
 
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0
-; CHECK-UNCOMPRESS: 1: 1
-; CHECK-UNCOMPRESS: 2: 13 fb:11
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:12:0
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb]:48:0
 ; CHECK-UNCOMPRESS: 1: 11
 ; CHECK-UNCOMPRESS: 2: 1 fa:1
-; CHECK-UNCOMPRESS:[main:1 @ foo]:3:0
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:24:0
+; CHECK-UNCOMPRESS: 1: 1
+; CHECK-UNCOMPRESS: 2: 13 fb:11
+; CHECK-UNCOMPRESS:[main:1 @ foo]:7:0
 ; CHECK-UNCOMPRESS: 2: 1
 ; CHECK-UNCOMPRESS: 3: 2 fa:1
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:3:0
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:7:0
 ; CHECK-UNCOMPRESS: 1: 1
 ; CHECK-UNCOMPRESS: 2: 2 fb:1
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:2:0
 ; CHECK-UNCOMPRESS: 2: 1 fa:1
-; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:1:0
+; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb:2 @ fa]:2:0
 ; CHECK-UNCOMPRESS: 4: 1
 
 
-; CHECK: [main:1 @ foo:3 @ fa]:14:0
-; CHECK:  1: 1
-; CHECK:  2: 13 fb:11
-; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:12:0
+; CHECK: [main:1 @ foo:3 @ fa:2 @ fb]:48:0
 ; CHECK:  1: 11
 ; CHECK:  2: 1 fa:1
-; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:4:0
+; CHECK: [main:1 @ foo:3 @ fa]:24:0
+; CHECK:  1: 1
+; CHECK:  2: 13 fb:11
+; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa]:9:0
 ; CHECK:  1: 1
 ; CHECK:  2: 2 fb:1
 ; CHECK:  4: 1
-; CHECK: [main:1 @ foo]:3:0
+; CHECK: [main:1 @ foo]:7:0
 ; CHECK:  2: 1
 ; CHECK:  3: 2 fa:1
-; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:0:0
+; CHECK: [main:1 @ foo:3 @ fa:2 @ fb:2 @ fa:2 @ fb]:1:0
 
 
 ; original code:
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 2e0b71f38e6d..1f842008db42 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -93,6 +93,8 @@ std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
   std::shared_ptr<StringBasedCtxKey> KeyStr =
       std::make_shared<StringBasedCtxKey>();
   KeyStr->Context = Binary->getExpandedContextStr(Stack);
+  if (KeyStr->Context.empty())
+    return nullptr;
   KeyStr->genHashCode();
   return KeyStr;
 }
@@ -116,6 +118,8 @@ void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
     return;
 
   std::shared_ptr<ContextKey> Key = Stack.getContextKey();
+  if (Key == nullptr)
+    return;
   auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
   SampleCounter &SCounter = Ret.first->second;
   for (auto &Item : Cur->RangeSamples) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 553ea71ea1fc..4cfadffebb18 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -212,7 +212,6 @@ void CSProfileGenerator::updateBodySamplesforFunctionProfile(
     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
                                    LeafLoc.second.Discriminator,
                                    Count - PreviousCount);
-    FunctionProfile.addTotalSamples(Count - PreviousCount);
   }
 }
 
@@ -242,9 +241,13 @@ void CSProfileGenerator::populateFunctionBodySamples(
 
     while (IP.Address <= RangeEnd) {
       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
-      const FrameLocation &LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
-      // Recording body sample for this specific context
-      updateBodySamplesforFunctionProfile(FunctionProfile, LeafLoc, Count);
+      auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
+      if (LeafLoc.hasValue()) {
+        // Recording body sample for this specific context
+        updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
+      }
+      // Accumulate total sample count even it's a line with invalid debug info
+      FunctionProfile.addTotalSamples(Count);
       // Move to next IP within the range
       IP.advance();
     }
@@ -266,9 +269,11 @@ void CSProfileGenerator::populateFunctionBoundarySamples(
       continue;
 
     // Record called target sample and its count
-    const FrameLocation &LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
-    FunctionProfile.addCalledTargetSamples(LeafLoc.second.LineOffset,
-                                           LeafLoc.second.Discriminator,
+    auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
+    if (!LeafLoc.hasValue())
+      continue;
+    FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
+                                           LeafLoc->second.Discriminator,
                                            CalleeName, Count);
 
     // Record head sample for called target(callee)
@@ -277,7 +282,7 @@ void CSProfileGenerator::populateFunctionBoundarySamples(
       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
       OCalleeCtxStr << " @ ";
     }
-    OCalleeCtxStr << getCallSite(LeafLoc) << " @ " << CalleeName.str();
+    OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
 
     FunctionSamples &CalleeProfile =
         getFunctionProfileForContext(OCalleeCtxStr.str());
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index d7588d680cca..df6ef2a7699b 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -119,7 +119,8 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
   const FrameLocationStack &Context2 = getFrameLocationStack(Offset2);
   if (Context1.size() != Context2.size())
     return false;
-
+  if (Context1.empty())
+    return false;
   // The leaf frame contains location within the leaf, and it
   // needs to be remove that as it's not part of the calling context
   return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
@@ -134,6 +135,10 @@ std::string ProfiledBinary::getExpandedContextStr(
   for (auto Address : Stack) {
     uint64_t Offset = virtualAddrToOffset(Address);
     const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
+    // An instruction without a valid debug line will be ignored by sample
+    // processing
+    if (ExpandedContext.empty())
+      return std::string();
     for (const auto &Loc : ExpandedContext) {
       ContextVec.push_back(getCallSite(Loc));
     }
@@ -242,9 +247,14 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
     const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
 
     // Populate a vector of the symbolized callsite at this location
-    InstructionPointer IP(this, Offset);
-    Offset2LocStackMap[Offset] = symbolize(IP, true);
-
+    // We don't need symbolized info for probe-based profile, just use an empty
+    // stack as an entry to indicate a valid binary offset
+    FrameLocationStack SymbolizedCallStack;
+    if (!UsePseudoProbes) {
+      InstructionPointer IP(this, Offset);
+      SymbolizedCallStack = symbolize(IP, true);
+    }
+    Offset2LocStackMap[Offset] = SymbolizedCallStack;
     // Populate address maps.
     CodeAddrs.push_back(Offset);
     if (MCDesc.isCall())
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index f6c7460e186d..ccb1c9d32e46 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -11,6 +11,7 @@
 
 #include "CallContext.h"
 #include "PseudoProbe.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -225,9 +226,11 @@ class ProfiledBinary {
     return FuncStartAddrMap[Offset];
   }
 
-  const FrameLocation &getInlineLeafFrameLoc(uint64_t Offset,
-                                             bool NameOnly = false) {
-    return getFrameLocationStack(Offset).back();
+  Optional<const FrameLocation> getInlineLeafFrameLoc(uint64_t Offset) {
+    const auto &Stack = getFrameLocationStack(Offset);
+    if (Stack.empty())
+      return {};
+    return Stack.back();
   }
 
   // Compare two addresses' inline context

From b5b31112bf63debaa905e42785317a947e696252 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 19 Feb 2021 21:48:46 -0800
Subject: [PATCH 150/244] [clang] Add -ffinite-loops & -fno-finite-loops
 options.

This cherry-picks the following patches on the release branch:

6280bb4cd80e [clang] Remove redundant condition (NFC).
51bf4c0e6d4c [clang] Add -ffinite-loops & -fno-finite-loops options.
fb4d8fe80701 [clang] Update mustprogress tests

This patch adds 2 new options to control when Clang adds `mustprogress`:

  1. -ffinite-loops: assume all loops are finite; mustprogress is added
     to all loops, regardless of the selected language standard.
  2. -fno-finite-loops: assume no loop is finite; mustprogress is not
     added to any loop or function. We could add mustprogress to
     functions without loops, but we would have to detect that in Clang,
     which is probably not worth it.

Differential Revision: https://reviews.llvm.org/D96850
---
 clang/include/clang/Basic/CodeGenOptions.def |   3 +
 clang/include/clang/Basic/CodeGenOptions.h   |   6 +
 clang/include/clang/Driver/Options.td        |   5 +
 clang/lib/CodeGen/CodeGenFunction.h          |  11 +
 clang/lib/Driver/ToolChains/Clang.cpp        |   3 +
 clang/lib/Frontend/CompilerInvocation.cpp    |   5 +-
 clang/test/CodeGen/attr-mustprogress-0.c     | 184 -----------
 clang/test/CodeGen/attr-mustprogress-0.cpp   | 183 ----------
 clang/test/CodeGen/attr-mustprogress-1.c     | 197 -----------
 clang/test/CodeGen/attr-mustprogress-1.cpp   | 271 ---------------
 clang/test/CodeGen/attr-mustprogress.c       | 221 +++++++++++++
 clang/test/CodeGenCXX/attr-mustprogress.cpp  | 330 +++++++++++++++++++
 12 files changed, 583 insertions(+), 836 deletions(-)
 delete mode 100644 clang/test/CodeGen/attr-mustprogress-0.c
 delete mode 100644 clang/test/CodeGen/attr-mustprogress-0.cpp
 delete mode 100644 clang/test/CodeGen/attr-mustprogress-1.c
 delete mode 100644 clang/test/CodeGen/attr-mustprogress-1.cpp
 create mode 100644 clang/test/CodeGen/attr-mustprogress.c
 create mode 100644 clang/test/CodeGenCXX/attr-mustprogress.cpp

diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 5c8af65326ed..9d53b5b923bb 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -266,6 +266,9 @@ CODEGENOPT(VectorizeLoop     , 1, 0) ///< Run loop vectorizer.
 CODEGENOPT(VectorizeSLP      , 1, 0) ///< Run SLP vectorizer.
 CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
 
+/// Treat loops as finite: language, always, never.
+ENUM_CODEGENOPT(FiniteLoops, FiniteLoopsKind, 2, FiniteLoopsKind::Language)
+
   /// Attempt to use register sized accesses to bit-fields in structures, when
   /// possible.
 CODEGENOPT(UseRegisterSizedBitfieldAccess , 1, 0)
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 73d41e3293c6..c550817f0f69 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -140,6 +140,12 @@ class CodeGenOptions : public CodeGenOptionsBase {
     All,         // Keep all frame pointers.
   };
 
+  enum FiniteLoopsKind {
+    Language, // Not specified, use language standard.
+    Always,   // All loops are assumed to be finite.
+    Never,    // No loop is assumed to be finite.
+  };
+
   /// The code model to use (-mcmodel).
   std::string CodeModel;
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1f6c13d5cc96..817798926650 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2410,6 +2410,11 @@ def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
 defm reroll_loops : BoolFOption<"reroll-loops",
   CodeGenOpts<"RerollLoops">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option], "Turn on loop reroller">, NegFlag<SetFalse>>;
+def ffinite_loops: Flag<["-"],  "ffinite-loops">, Group<f_Group>,
+  HelpText<"Assume all loops are finite.">, Flags<[CC1Option]>;
+def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
+  HelpText<"Do not assume that any loop is finite.">, Flags<[CC1Option]>;
+
 def ftrigraphs : Flag<["-"], "ftrigraphs">, Group<f_Group>,
   HelpText<"Process trigraph sequences">, Flags<[CC1Option]>;
 def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group<f_Group>,
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 8eb7adbc8fcb..95c0b7b4d7c0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -507,12 +507,23 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   /// True if the C++ Standard Requires Progress.
   bool CPlusPlusWithProgress() {
+    if (CGM.getCodeGenOpts().getFiniteLoops() ==
+        CodeGenOptions::FiniteLoopsKind::Never)
+      return false;
+
     return getLangOpts().CPlusPlus11 || getLangOpts().CPlusPlus14 ||
            getLangOpts().CPlusPlus17 || getLangOpts().CPlusPlus20;
   }
 
   /// True if the C Standard Requires Progress.
   bool CWithProgress() {
+    if (CGM.getCodeGenOpts().getFiniteLoops() ==
+        CodeGenOptions::FiniteLoopsKind::Always)
+      return true;
+    if (CGM.getCodeGenOpts().getFiniteLoops() ==
+        CodeGenOptions::FiniteLoopsKind::Never)
+      return false;
+
     return getLangOpts().C11 || getLangOpts().C17 || getLangOpts().C2x;
   }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index f8e637974662..1976b48e0f6a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5620,6 +5620,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     if (A->getOption().matches(options::OPT_freroll_loops))
       CmdArgs.push_back("-freroll-loops");
 
+  Args.AddLastArg(CmdArgs, options::OPT_ffinite_loops,
+                  options::OPT_fno_finite_loops);
+
   Args.AddLastArg(CmdArgs, options::OPT_fwritable_strings);
   Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
                   options::OPT_fno_unroll_loops);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 036388ebd355..5c5cf46150e2 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1037,7 +1037,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
   Opts.UnrollLoops =
       Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops,
                    (Opts.OptimizationLevel > 1));
-
   Opts.BinutilsVersion =
       std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
 
@@ -1324,6 +1323,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
 
   Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true);
 
+  if (Args.hasArg(options::OPT_ffinite_loops))
+    Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Always;
+  else if (Args.hasArg(options::OPT_fno_finite_loops))
+    Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never;
   return Success;
 }
 
diff --git a/clang/test/CodeGen/attr-mustprogress-0.c b/clang/test/CodeGen/attr-mustprogress-0.c
deleted file mode 100644
index 2af24e88ceef..000000000000
--- a/clang/test/CodeGen/attr-mustprogress-0.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes
-// RUN: %clang_cc1 -std=c89 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c99 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-
-int a = 0;
-int b = 0;
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @f1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f1() {
-  for (; 1;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @f2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f2() {
-  for (; a == b;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @F(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK:       for.cond1:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY2:%.*]], label [[FOR_END3:%.*]]
-// CHECK:       for.body2:
-// CHECK-NEXT:    br label [[FOR_COND1]]
-// CHECK:       for.end3:
-// CHECK-NEXT:    ret void
-//
-void F() {
-  for (; 1;) {
-  }
-  for (; a == b;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @w1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_BODY]]
-//
-void w1() {
-  while (1) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @w2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]]
-// CHECK:       while.end:
-// CHECK-NEXT:    ret void
-//
-void w2() {
-  while (a == b) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @W(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]]
-// CHECK:       while.end:
-// CHECK-NEXT:    br label [[WHILE_BODY2:%.*]]
-// CHECK:       while.body2:
-// CHECK-NEXT:    br label [[WHILE_BODY2]]
-//
-void W() {
-  while (a == b) {
-  }
-  while (1) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @d1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d1() {
-  do {
-  } while (1);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @d2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d2() {
-  do {
-  } while (a == b);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @D(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    br label [[DO_BODY1:%.*]]
-// CHECK:       do.body1:
-// CHECK-NEXT:    br label [[DO_COND2:%.*]]
-// CHECK:       do.cond2:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY1]], label [[DO_END3:%.*]]
-// CHECK:       do.end3:
-// CHECK-NEXT:    ret void
-//
-void D() {
-  do {
-  } while (1);
-  do {
-  } while (a == b);
-}
diff --git a/clang/test/CodeGen/attr-mustprogress-0.cpp b/clang/test/CodeGen/attr-mustprogress-0.cpp
deleted file mode 100644
index 3a180cc6b5ad..000000000000
--- a/clang/test/CodeGen/attr-mustprogress-0.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes
-// RUN: %clang_cc1 -std=c++98 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-
-int a = 0;
-int b = 0;
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2f1v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f1() {
-  for (; 1;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2f2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f2() {
-  for (; a == b;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z1Fv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK:       for.cond1:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY2:%.*]], label [[FOR_END3:%.*]]
-// CHECK:       for.body2:
-// CHECK-NEXT:    br label [[FOR_COND1]]
-// CHECK:       for.end3:
-// CHECK-NEXT:    ret void
-//
-void F() {
-  for (; 1;) {
-  }
-  for (; a == b;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2w1v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_BODY]]
-//
-void w1() {
-  while (1) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2w2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]]
-// CHECK:       while.end:
-// CHECK-NEXT:    ret void
-//
-void w2() {
-  while (a == b) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z1Wv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]]
-// CHECK:       while.end:
-// CHECK-NEXT:    br label [[WHILE_BODY2:%.*]]
-// CHECK:       while.body2:
-// CHECK-NEXT:    br label [[WHILE_BODY2]]
-//
-void W() {
-  while (a == b) {
-  }
-  while (1) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2d1v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d1() {
-  do {
-  } while (1);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2d2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d2() {
-  do {
-  } while (a == b);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z1Dv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    br label [[DO_BODY1:%.*]]
-// CHECK:       do.body1:
-// CHECK-NEXT:    br label [[DO_COND2:%.*]]
-// CHECK:       do.cond2:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY1]], label [[DO_END3:%.*]]
-// CHECK:       do.end3:
-// CHECK-NEXT:    ret void
-//
-void D() {
-  do {
-  } while (1);
-  do {
-  } while (a == b);
-}
diff --git a/clang/test/CodeGen/attr-mustprogress-1.c b/clang/test/CodeGen/attr-mustprogress-1.c
deleted file mode 100644
index 2ff068b8b90a..000000000000
--- a/clang/test/CodeGen/attr-mustprogress-1.c
+++ /dev/null
@@ -1,197 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes
-// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c11 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c18 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c2x -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-
-int a = 0;
-int b = 0;
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @f0(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NOT:    br label [[FOR_COND]], !llvm.loop !{{.*}}
-//
-void f0() {
-  for (; ;) ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @f1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f1() {
-  for (; 1;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @f2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]], [[LOOP2:!llvm.loop !.*]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f2() {
-  for (; a == b;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @F(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK:       for.cond1:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY2:%.*]], label [[FOR_END3:%.*]]
-// CHECK:       for.body2:
-// CHECK-NEXT:    br label [[FOR_COND1]], [[LOOP4:!llvm.loop !.*]]
-// CHECK:       for.end3:
-// CHECK-NEXT:    ret void
-//
-void F() {
-  for (; 1;) {
-  }
-  for (; a == b;) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @w1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_BODY]]
-//
-void w1() {
-  while (1) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @w2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]], [[LOOP5:!llvm.loop !.*]]
-// CHECK:       while.end:
-// CHECK-NEXT:    ret void
-//
-void w2() {
-  while (a == b) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @W(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]], [[LOOP6:!llvm.loop !.*]]
-// CHECK:       while.end:
-// CHECK-NEXT:    br label [[WHILE_BODY2:%.*]]
-// CHECK:       while.body2:
-// CHECK-NEXT:    br label [[WHILE_BODY2]]
-//
-void W() {
-  while (a == b) {
-  }
-  while (1) {
-  }
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @d1(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d1() {
-  do {
-  } while (1);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @d2(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]], [[LOOP7:!llvm.loop !.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d2() {
-  do {
-  } while (a == b);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @D(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    br label [[DO_BODY1:%.*]]
-// CHECK:       do.body1:
-// CHECK-NEXT:    br label [[DO_COND2:%.*]]
-// CHECK:       do.cond2:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY1]], label [[DO_END3:%.*]], [[LOOP8:!llvm.loop !.*]]
-// CHECK:       do.end3:
-// CHECK-NEXT:    ret void
-//
-void D() {
-  do {
-  } while (1);
-  do {
-  } while (a == b);
-}
diff --git a/clang/test/CodeGen/attr-mustprogress-1.cpp b/clang/test/CodeGen/attr-mustprogress-1.cpp
deleted file mode 100644
index 945d74663c6d..000000000000
--- a/clang/test/CodeGen/attr-mustprogress-1.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes
-// RUN: %clang_cc1 -std=c++11 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c++14 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c++17 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-
-int a = 0;
-int b = 0;
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2f0v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NOT:    br label [[FOR_COND]], !llvm.loop !{{.*}}
-void f0() {
-  for (; ;) ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2f1v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f1() {
-  for (; 1;)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone mustprogress
-// CHECK-LABEL: @_Z2f2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]], [[LOOP2:!llvm.loop !.*]]
-// CHECK:       for.end:
-// CHECK-NEXT:    ret void
-//
-void f2() {
-  for (; a == b;)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z1Fv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]]
-// CHECK:       for.end:
-// CHECK-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK:       for.cond1:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY2:%.*]], label [[FOR_END3:%.*]]
-// CHECK:       for.body2:
-// CHECK-NEXT:    br label [[FOR_COND1]], [[LOOP4:!llvm.loop !.*]]
-// CHECK:       for.end3:
-// CHECK-NEXT:    ret void
-//
-void F() {
-  for (; 1;)
-    ;
-  for (; a == b;)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2F2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK:       for.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// CHECK:       for.body:
-// CHECK-NEXT:    br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]]
-// CHECK:       for.end:
-// CHECK-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK:       for.cond1:
-// CHECK-NEXT:    br i1 true, label [[FOR_BODY2:%.*]], label [[FOR_END3:%.*]]
-// CHECK:       for.body2:
-// CHECK-NEXT:    br label [[FOR_COND1]]
-// CHECK:       for.end3:
-// CHECK-NEXT:    ret void
-//
-void F2() {
-  for (; a == b;)
-    ;
-  for (; 1;)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2w1v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_BODY]]
-//
-void w1() {
-  while (1)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone mustprogress
-// CHECK-LABEL: @_Z2w2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]], [[LOOP6:!llvm.loop !.*]]
-// CHECK:       while.end:
-// CHECK-NEXT:    ret void
-//
-void w2() {
-  while (a == b)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z1Wv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-// CHECK:       while.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY:%.*]], label [[WHILE_END:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_COND]], [[LOOP7:!llvm.loop !.*]]
-// CHECK:       while.end:
-// CHECK-NEXT:    br label [[WHILE_BODY2:%.*]]
-// CHECK:       while.body2:
-// CHECK-NEXT:    br label [[WHILE_BODY2]]
-//
-void W() {
-  while (a == b)
-    ;
-  while (1)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2W2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
-// CHECK:       while.body:
-// CHECK-NEXT:    br label [[WHILE_BODY]]
-//
-void W2() {
-  while (1)
-    ;
-  while (a == b)
-    ;
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2d1v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d1() {
-  do
-    ;
-  while (1);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone mustprogress
-// CHECK-LABEL: @_Z2d2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]], [[LOOP8:!llvm.loop !.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    ret void
-//
-void d2() {
-  do
-    ;
-  while (a == b);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z1Dv(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY]], label [[DO_END:%.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    br label [[DO_BODY1:%.*]]
-// CHECK:       do.body1:
-// CHECK-NEXT:    br label [[DO_COND2:%.*]]
-// CHECK:       do.cond2:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY1]], label [[DO_END3:%.*]], [[LOOP9:!llvm.loop !.*]]
-// CHECK:       do.end3:
-// CHECK-NEXT:    ret void
-//
-void D() {
-  do
-    ;
-  while (1);
-  do
-    ;
-  while (a == b);
-}
-
-// CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: @_Z2D2v(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[DO_BODY:%.*]]
-// CHECK:       do.body:
-// CHECK-NEXT:    br label [[DO_COND:%.*]]
-// CHECK:       do.cond:
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
-// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]], [[LOOP10:!llvm.loop !.*]]
-// CHECK:       do.end:
-// CHECK-NEXT:    br label [[DO_BODY1:%.*]]
-// CHECK:       do.body1:
-// CHECK-NEXT:    br label [[DO_COND2:%.*]]
-// CHECK:       do.cond2:
-// CHECK-NEXT:    br i1 true, label [[DO_BODY1]], label [[DO_END3:%.*]]
-// CHECK:       do.end3:
-// CHECK-NEXT:    ret void
-//
-void D2() {
-  do
-    ;
-  while (a == b);
-  do
-    ;
-  while (1);
-}
-
diff --git a/clang/test/CodeGen/attr-mustprogress.c b/clang/test/CodeGen/attr-mustprogress.c
new file mode 100644
index 000000000000..1f83cd44b308
--- /dev/null
+++ b/clang/test/CodeGen/attr-mustprogress.c
@@ -0,0 +1,221 @@
+// RUN: %clang_cc1 -std=c99 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=C99 %s
+// RUN: %clang_cc1 -std=c11 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=C11 %s
+// RUN: %clang_cc1 -std=c18 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=C11 %s
+// RUN: %clang_cc1 -std=c2x -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=C11 %s
+//
+// RUN: %clang_cc1 -std=c11 -ffinite-loops -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=FINITE %s
+// RUN: %clang_cc1 -std=c11 -fno-finite-loops -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=C99 %s
+
+int a = 0;
+int b = 0;
+
+// CHECK: datalayout
+//
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @f0(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NOT:     br {{.*}}!llvm.loop
+//
+void f0() {
+  for (; ;) ;
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @f1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    br i1 true, label %for.body, label %for.end
+// CHECK:       for.body:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       for.end:
+// CHECK-NEXT:    ret void
+//
+void f1() {
+  for (; 1;) {
+  }
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @f2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %for.body, label %for.end
+// CHECK:       for.body:
+// C99-NOT:       br {{.*}} !llvm.loop
+// C11:           br label %for.cond, !llvm.loop [[LOOP1:!.*]]
+// FINITE:        br label %for.cond, !llvm.loop [[LOOP1:!.*]]
+// CHECK:       for.end:
+// CHECK-NEXT:    ret void
+//
+void f2() {
+  for (; a == b;) {
+  }
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @F(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    br i1 true, label %for.body, label %for.end
+// CHECK:       for.body:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       for.end:
+// CHECK-NEXT:    br label %for.cond1
+// CHECK:       for.cond1:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %for.body2, label %for.end3
+// CHECK:       for.body2:
+// C99-NOT:       br {{.*}}, !llvm.loop
+// C11:           br label %for.cond1, !llvm.loop [[LOOP2:!.*]]
+// FINITE:        br label %for.cond1, !llvm.loop [[LOOP2:!.*]]
+// CHECK:       for.end3:
+// CHECK-NEXT:    ret void
+//
+void F() {
+  for (; 1;) {
+  }
+  for (; a == b;) {
+  }
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @w1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %while.body
+// CHECK:       while.body:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+//
+void w1() {
+  while (1) {
+  }
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @w2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %while.cond
+// CHECK:       while.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %while.body, label %while.end
+// CHECK:       while.body:
+// C99-NOT:       br {{.*}}, !llvm.loop
+// C11:           br label %while.cond, !llvm.loop [[LOOP3:!.*]]
+// FINITE:        br label %while.cond, !llvm.loop [[LOOP3:!.*]]
+// CHECK:       while.end:
+// CHECK-NEXT:    ret void
+//
+void w2() {
+  while (a == b) {
+  }
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @W(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label [[WHILE_COND:%.*]]
+// CHECK:       while.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %while.body, label %while.end
+// CHECK:       while.body:
+// C99-NOT:       br {{.*}} !llvm.loop
+// C11:           br label %while.cond, !llvm.loop [[LOOP4:!.*]]
+// FINITE:        br label %while.cond, !llvm.loop [[LOOP4:!.*]]
+// CHECK:       while.end:
+// CHECK-NEXT:    br label %while.body2
+// CHECK:       while.body2:
+// CHECK-NOT:     br {{.*}} !llvm.loop
+//
+void W() {
+  while (a == b) {
+  }
+  while (1) {
+  }
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @d1(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       do.end:
+// CHECK-NEXT:    ret void
+//
+void d1() {
+  do {
+  } while (1);
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @d2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// C99-NOT:       br {{.*}}, !llvm.loop
+// C11:           br i1 [[CMP]], label %do.body, label %do.end, !llvm.loop [[LOOP5:!.*]]
+// FINITE:        br i1 [[CMP]], label %do.body, label %do.end, !llvm.loop [[LOOP5:!.*]]
+// CHECK:       do.end:
+// CHECK-NEXT:    ret void
+//
+void d2() {
+  do {
+  } while (a == b);
+}
+
+// CHECK-NOT: mustprogress
+// CHECK-LABEL: @D(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NOT:     br label {{.*}}, !llvm.loop
+// CHECK:       do.end:
+// CHECK-NEXT:    br label %do.body1
+// CHECK:       do.body1:
+// CHECK-NEXT:    br label %do.cond2
+// CHECK:       do.cond2:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// C99-NOT:       br {{.*}}, !llvm.loop
+// C11:           br i1 [[CMP]], label %do.body1, label %do.end3, !llvm.loop [[LOOP6:!.*]]
+// FINITE:        br i1 [[CMP]], label %do.body1, label %do.end3, !llvm.loop [[LOOP6:!.*]]
+// CHECK:       do.end3:
+// CHECK-NEXT:    ret void
+//
+void D() {
+  do {
+  } while (1);
+  do {
+  } while (a == b);
+}
+
+// C11: [[LOOP1]] = distinct !{[[LOOP1]], [[MP:!.*]]}
+// C11: [[MP]] = !{!"llvm.loop.mustprogress"}
+// C11: [[LOOP2]] = distinct !{[[LOOP2]], [[MP]]}
+// C11: [[LOOP3]] = distinct !{[[LOOP3]], [[MP]]}
+// C11: [[LOOP4]] = distinct !{[[LOOP4]], [[MP]]}
+// C11: [[LOOP5]] = distinct !{[[LOOP5]], [[MP]]}
+// C11: [[LOOP6]] = distinct !{[[LOOP6]], [[MP]]}
diff --git a/clang/test/CodeGenCXX/attr-mustprogress.cpp b/clang/test/CodeGenCXX/attr-mustprogress.cpp
new file mode 100644
index 000000000000..48ac7ad938ba
--- /dev/null
+++ b/clang/test/CodeGenCXX/attr-mustprogress.cpp
@@ -0,0 +1,330 @@
+// RUN: %clang_cc1 -std=c++98 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CXX98 %s
+// RUN: %clang_cc1 -std=c++11 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CXX11 %s
+// RUN: %clang_cc1 -std=c++14 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CXX11 %s
+// RUN: %clang_cc1 -std=c++17 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CXX11 %s
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CXX11 %s
+
+// Make sure -ffinite-loops overrides -std=c++98 for loops.
+// RUN: %clang_cc1 -std=c++98 -ffinite-loops -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=FINITE %s
+
+// Make sure -fno-finite-loops overrides -std=c++11
+// RUN: %clang_cc1 -std=c++11 -fno-finite-loops -triple=x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK --check-prefix=CXX98 %s
+
+int a = 0;
+int b = 0;
+
+// CHECK: datalayout
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT:     mustprogress
+// CHECK-LABEL: @_Z2f0v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NOT:    br {{.*}} llvm.loop
+void f0() {
+  for (; ;) ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z2f1v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    br i1 true, label %for.body, label %for.end
+// CHECK:       for.body:
+// CHECK-NOT:    br {{.*}}, !llvm.loop
+// CHECK:       for.end:
+// CHECK-NEXT:    ret void
+//
+void f1() {
+  for (; 1;)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11:     mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z2f2v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %for.body, label %for.end
+// CHECK:       for.body:
+// CXX98-NOT:    br {{.*}}, !llvm.loop
+// CXX11:        br label %for.cond, !llvm.loop [[LOOP1:!.*]]
+// FINITE-NEXT:   br label %for.cond, !llvm.loop [[LOOP1:!.*]]
+// CHECK:       for.end:
+// CHECK-NEXT:    ret void
+//
+void f2() {
+  for (; a == b;)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z1Fv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    br i1 true, label %for.body, label %for.end
+// CHECK:       for.body:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       for.end:
+// CHECK-NEXT:    br label %for.cond1
+// CHECK:       for.cond1:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %for.body2, label %for.end3
+// CHECK:       for.body2:
+// CXX98-NOT:     br {{.*}}, !llvm.loop
+// CXX11-NEXT:    br label %for.cond1, !llvm.loop [[LOOP2:!.*]]
+// FINITE-NEXT:   br label %for.cond1, !llvm.loop [[LOOP2:!.*]]
+// CHECK:       for.end3:
+// CHECK-NEXT:    ret void
+//
+void F() {
+  for (; 1;)
+    ;
+  for (; a == b;)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z2F2v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %for.cond
+// CHECK:       for.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %for.body, label %for.end
+// CHECK:       for.body:
+// CXX98_NOT:     br {{.*}} !llvm.loop
+// CXX11-NEXT:    br label %for.cond, !llvm.loop [[LOOP3:!.*]]
+// FINITE-NEXT:    br label %for.cond, !llvm.loop [[LOOP3:!.*]]
+// CHECK:       for.end:
+// CHECK-NEXT:    br label %for.cond1
+// CHECK:       for.cond1:
+// CHECK-NEXT:    br i1 true, label %for.body2, label %for.end3
+// CHECK:       for.body2:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       for.end3:
+// CHECK-NEXT:    ret void
+//
+void F2() {
+  for (; a == b;)
+    ;
+  for (; 1;)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT:     mustprogress
+// CHECK-LABEL: @_Z2w1v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %while.body
+// CHECK:       while.body:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+//
+void w1() {
+  while (1)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11:     mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z2w2v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %while.cond
+// CHECK:       while.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %while.body, label %while.end
+// CHECK:       while.body:
+// CXX98-NOT:     br {{.*}}, !llvm.loop
+// CXX11-NEXT:    br label %while.cond, !llvm.loop [[LOOP4:!.*]]
+// FINITE-NEXT:   br label %while.cond, !llvm.loop [[LOOP4:!.*]]
+// CHECK:       while.end:
+// CHECK-NEXT:    ret void
+//
+void w2() {
+  while (a == b)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z1Wv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %while.cond
+// CHECK:       while.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    br i1 [[CMP]], label %while.body, label %while.end
+// CHECK:       while.body:
+// CXX98-NOT:     br {{.*}}, !llvm.loop
+// CXX11-NEXT:    br label %while.cond, !llvm.loop [[LOOP5:!.*]]
+// FINITE-NEXT:   br label %while.cond, !llvm.loop [[LOOP5:!.*]]
+// CHECK:       while.end:
+// CHECK-NEXT:    br label %while.body2
+// CHECK:       while.body2:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+//
+void W() {
+  while (a == b)
+    ;
+  while (1)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z2W2v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %while.body
+// CHECK:       while.body:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+//
+void W2() {
+  while (1)
+    ;
+  while (a == b)
+    ;
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT: mustprogress
+// CHECK-LABEL: @_Z2d1v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       do.end:
+// CHECK-NEXT:    ret void
+//
+void d1() {
+  do
+    ;
+  while (1);
+}
+
+// CXX98-NOT: mustprogress
+// CXX11:     mustprogress
+// FINITE-NOT:  mustprogress
+// CHECK-LABEL: @_Z2d2v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CXX98-NOT:     br {{.*}}, !llvm.loop
+// CXX11-NEXT:    br i1 [[CMP]], label %do.body, label %do.end, !llvm.loop [[LOOP6:!.*]]
+// FINITE-NEXT:   br i1 [[CMP]], label %do.body, label %do.end, !llvm.loop [[LOOP6:!.*]]
+// CHECK:       do.end:
+// CHECK-NEXT:    ret void
+//
+void d2() {
+  do
+    ;
+  while (a == b);
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT:     mustprogress
+// CHECK-LABEL: @_Z1Dv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       do.end:
+// CHECK-NEXT:    br label %do.body1
+// CHECK:       do.body1:
+// CHECK-NEXT:    br label %do.cond2
+// CHECK:       do.cond2:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CXX98-NOT:     br {{.*}}, !llvm.loop
+// CXX11-NEXT:    br i1 [[CMP]], label %do.body1, label %do.end3, !llvm.loop [[LOOP7:!.*]]
+// FINITE-NEXT:   br i1 [[CMP]], label %do.body1, label %do.end3, !llvm.loop [[LOOP7:!.*]]
+// CHECK:       do.end3:
+// CHECK-NEXT:    ret void
+//
+void D() {
+  do
+    ;
+  while (1);
+  do
+    ;
+  while (a == b);
+}
+
+// CXX98-NOT: mustprogress
+// CXX11-NOT: mustprogress
+// FINITE-NOT:     mustprogress
+// CHECK-LABEL: @_Z2D2v(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    br label %do.body
+// CHECK:       do.body:
+// CHECK-NEXT:    br label %do.cond
+// CHECK:       do.cond:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
+// CXX98-NOT:     br {{.*}}, !llvm.loop
+// CXX11-NEXT:    br i1 [[CMP]], label %do.body, label %do.end, !llvm.loop [[LOOP8:!.*]]
+// FINITE-NEXT:   br i1 [[CMP]], label %do.body, label %do.end, !llvm.loop [[LOOP8:!.*]]
+// CHECK:       do.end:
+// CHECK-NEXT:    br label %do.body1
+// CHECK:       do.body1:
+// CHECK-NEXT:    br label %do.cond2
+// CHECK:       do.cond2:
+// CHECK-NOT:     br {{.*}}, !llvm.loop
+// CHECK:       do.end3:
+// CHECK-NEXT:    ret void
+//
+void D2() {
+  do
+    ;
+  while (a == b);
+  do
+    ;
+  while (1);
+}
+
+// CXX11: [[LOOP1]] = distinct !{[[LOOP1]], [[MP:!.*]]}
+// CXX11: [[MP]] = !{!"llvm.loop.mustprogress"}
+// CXX11: [[LOOP2]] = distinct !{[[LOOP2]], [[MP]]}
+// CXX11: [[LOOP3]] = distinct !{[[LOOP3]], [[MP]]}
+// CXX11: [[LOOP4]] = distinct !{[[LOOP4]], [[MP]]}
+// CXX11: [[LOOP5]] = distinct !{[[LOOP5]], [[MP]]}
+// CXX11: [[LOOP6]] = distinct !{[[LOOP6]], [[MP]]}
+// CXX11: [[LOOP7]] = distinct !{[[LOOP7]], [[MP]]}
+// CXX11: [[LOOP8]] = distinct !{[[LOOP8]], [[MP]]}

From bdafd284b291436d3fa4644f585efe3b06363554 Mon Sep 17 00:00:00 2001
From: "William S. Moses" <gh@wsmoses.com>
Date: Wed, 17 Feb 2021 13:54:42 -0500
Subject: [PATCH 151/244] [SROA] Amend failing test from D95826

(cherry picked from commit 892d2822b62ebcaa7aa0b006b5ea4f26593c1618)
---
 llvm/test/Transforms/SROA/tbaa-struct2.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll
index 75f72f4e9963..13075dd84326 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct2.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll
@@ -35,8 +35,8 @@ define double @bar(%struct.Wishart* %wishart) {
 ; CHECK-NEXT:   %tmp.sroa.2.0.copyload = load i32, i32* %tmp.sroa.2.0.waddr.sroa_idx1, align 8, !tbaa.struct !7
 ; CHECK-NEXT:   %tmp.sroa.3.0.waddr.sroa_raw_cast = bitcast %struct.Wishart* %wishart to i8*
 ; CHECK-NEXT:   %tmp.sroa.3.0.waddr.sroa_raw_idx = getelementptr inbounds i8, i8* %tmp.sroa.3.0.waddr.sroa_raw_cast, i64 12
-; CHECK-NEXT:   %tmp.sroa.3.0.tmpaddr.sroa_idx = getelementptr inbounds [4 x i8], [4 x i8]* %tmp.sroa.3, i64 0, i64 0
-; CHECK-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %tmp.sroa.3.0.tmpaddr.sroa_idx, i8* align 4 %tmp.sroa.3.0.waddr.sroa_raw_idx, i64 4, i1 false), !tbaa.struct !8
+; CHECK-NEXT:   %[[sroa_idx:.+]] = getelementptr inbounds [4 x i8], [4 x i8]* %tmp.sroa.3, i64 0, i64 0
+; CHECK-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[sroa_idx]], i8* align 4 %tmp.sroa.3.0.waddr.sroa_raw_idx, i64 4, i1 false), !tbaa.struct !8
 ; CHECK-NEXT:   %call = call double @subcall(double %tmp.sroa.0.0.copyload, i32 %tmp.sroa.2.0.copyload)
 ; CHECK-NEXT:   ret double %call
 ; CHECK-NEXT: }
@@ -48,4 +48,4 @@ define double @bar(%struct.Wishart* %wishart) {
 ; CHECK: !5 = !{!6, !6, i64 0}
 ; CHECK: !6 = !{!"int", !{{[0-9]+}}, i64 0}
 ; CHECK: !7 = !{i64 0, i64 4, !5}
-; CHECK: !8 = !{}
\ No newline at end of file
+; CHECK: !8 = !{}

From ee7eaf860cde24a91a5b17390b8ad5eddd05f7f9 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 4 Feb 2021 09:07:44 -0800
Subject: [PATCH 152/244] [llvm-objdump] --source: drop the warning when there
 is no debug info

Warnings have been added for three cases (PR41905): (1) missing debug info, (2)
the source file cannot be found, (3) the debug info points at a line beyond the
end of the file.

(1) is probably less useful. This was brought up once on
http://lists.llvm.org/pipermail/llvm-dev/2020-April/141264.html and two
internal users mentioned it to me that it was annoying. (I personally
find the warning confusing, too.)

Users specify --source to get additional information if sources happen to be
available.  If sources are not available, it should be obvious as the output
will have no interleaved source lines. The warning can be especially annoying
when using llvm-objdump -S on a bunch of files.

This patch drops the warning when there is no debug info.
(If LLVMSymbolizer::symbolizeCode returns an `Error`, there will still be
an error. There is currently no test for an `Error` return value.
The only code path is probably a broken symbol table, but we probably already emit a warning
in that case)

`source-interleave-prefix.test` has an inappropriate "malformed" test - the test simply has no
.debug_* because new llc does not produce debug info when the filename is empty (invalid).
I have tried tampering the header of .debug_info/.debug_line but llvm-symbolizer does not warn.
This patch does not intend to add the missing test coverage.

Differential Revision: https://reviews.llvm.org/D88715

(cherry picked from commit eecbb1c77655d38c06e47cf32e2dcc72da45c517)
---
 .../X86/source-interleave-no-debug-info.test  |  6 ++--
 .../X86/source-interleave-prefix.test         |  9 ------
 llvm/tools/llvm-objdump/llvm-objdump.cpp      | 30 ++++++++-----------
 3 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
index 25deaa00243c..89b03d429bfa 100644
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
@@ -1,15 +1,13 @@
 ## Test that if an object has no debug information, only the disassembly is
-## printed when --source is specified, and that we emit a warning.
+## printed when --source is specified, and that we do not emit a warning.
 
 # RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %p/Inputs/source-interleave.ll > %t.ll
 # RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
 # RUN: llvm-objcopy --strip-debug %t.o %t2.o
 
 # RUN: llvm-objdump --source %t.o | FileCheck %s --check-prefixes=CHECK,SOURCE
-# RUN: llvm-objdump --source %t2.o 2> %t2.e | FileCheck %s --check-prefixes=CHECK --implicit-check-not='main()'
-# RUN: FileCheck %s --input-file %t2.e --check-prefixes=WARN
+# RUN: llvm-objdump --source %t2.o 2>&1 | FileCheck %s --check-prefixes=CHECK --implicit-check-not='main()' --implicit-check-not=warning:
 
-# WARN:        warning: '{{.*}}2.o': failed to parse debug information
 # CHECK:       0000000000000010 <main>:
 # SOURCE-NEXT: ; int main() {
 # CHECK-NEXT:   10:   55                      pushq   %rbp
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test
index b384c49b350e..23ce55a329ac 100644
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-prefix.test
@@ -24,15 +24,6 @@
 ; RUN: llvm-objdump --prefix myprefix --source %t-correct-prefix.o 2>&1 | \
 ; RUN:   FileCheck %s --check-prefix=CHECK-BROKEN-PREFIX -DFILE=%t-correct-prefix.o -DPREFIX=myprefix%/p
 
-;; Test malformed input.
-
-; RUN: sed -e "s,SRC_COMPDIR,,g" -e "s,filename: \"source-interleave-x86_64.c\",filename: \"\",g" \
-; RUN:   %p/Inputs/source-interleave.ll > %t-malformed.ll
-; RUN: llc -o %t-malformed.o -filetype=obj -mtriple=x86_64-pc-linux %t-malformed.ll
-; RUN: llvm-objdump --prefix myprefix --source %t-malformed.o 2>&1 | \
-; RUN:   FileCheck %s --check-prefix=CHECK-MALFORMED -DFILE=%t-malformed.o
-; CHECK-MALFORMED: warning: '[[FILE]]': failed to parse debug information for [[FILE]]
-
 ;; Using only a prefix separator is the same as not using the `--prefix` option.
 
 ; RUN: llvm-objdump --prefix / --source %t-missing-prefix.o 2>&1 | \
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 3134f989603a..17128e95727f 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -947,8 +947,8 @@ class SourcePrinter {
   std::unordered_map<std::string, std::vector<StringRef>> LineCache;
   // Keep track of missing sources.
   StringSet<> MissingSources;
-  // Only emit 'no debug info' warning once.
-  bool WarnedNoDebugInfo;
+  // Only emit 'invalid debug info' warning once.
+  bool WarnedInvalidDebugInfo = false;
 
 private:
   bool cacheSource(const DILineInfo& LineInfoFile);
@@ -962,8 +962,7 @@ class SourcePrinter {
 
 public:
   SourcePrinter() = default;
-  SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch)
-      : Obj(Obj), WarnedNoDebugInfo(false) {
+  SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
     symbolize::LLVMSymbolizer::Options SymbolizerOpts;
     SymbolizerOpts.PrintFunctions =
         DILineInfoSpecifier::FunctionNameKind::LinkageName;
@@ -1018,22 +1017,17 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS,
     return;
 
   DILineInfo LineInfo = DILineInfo();
-  auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
+  Expected<DILineInfo> ExpectedLineInfo =
+      Symbolizer->symbolizeCode(*Obj, Address);
   std::string ErrorMessage;
-  if (!ExpectedLineInfo)
-    ErrorMessage = toString(ExpectedLineInfo.takeError());
-  else
+  if (ExpectedLineInfo) {
     LineInfo = *ExpectedLineInfo;
-
-  if (LineInfo.FileName == DILineInfo::BadString) {
-    if (!WarnedNoDebugInfo) {
-      std::string Warning =
-          "failed to parse debug information for " + ObjectFilename.str();
-      if (!ErrorMessage.empty())
-        Warning += ": " + ErrorMessage;
-      reportWarning(Warning, ObjectFilename);
-      WarnedNoDebugInfo = true;
-    }
+  } else if (!WarnedInvalidDebugInfo) {
+    WarnedInvalidDebugInfo = true;
+    // TODO Untested.
+    reportWarning("failed to parse debug information: " +
+                      toString(ExpectedLineInfo.takeError()),
+                  ObjectFilename);
   }
 
   if (!Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) {

From 76d5d54f62599d249e0bf2d1b0998451a584c3f3 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sun, 14 Feb 2021 12:25:56 -0600
Subject: [PATCH 153/244] Avoid use of stack allocations in asynchronous calls

NOTE: This is an adaption of the original patch to be applicable to the
      LLVM 12 release branch. Logic is the same though.

As reported by Guilherme Valarini [0], we used to pass stack allocations
to calls that can nowadays be asynchronous. This is arguably a problem
and it will inevitably result in UB. To remedy the situation we allocate
the locations as part of the AsyncInfoTy object. The lifetime of that
object matches what we need for now. If the synchronization is not tied
to the AsyncInfoTy object anymore we might need to have a different
buffer construct in global space.

This should be back-ported to LLVM 12 but needs slight modifications as
it is based on refactoring patches we do not need to backport.

[0] https://lists.llvm.org/pipermail/openmp-dev/2021-February/003867.html

Differential Revision: https://reviews.llvm.org/D96667
---
 openmp/libomptarget/include/omptarget.h | 10 ++++++++++
 openmp/libomptarget/src/omptarget.cpp   | 15 ++++++++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 9c533944d135..46bb8206efa1 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -14,6 +14,8 @@
 #ifndef _OMPTARGET_H_
 #define _OMPTARGET_H_
 
+#include <deque>
+#include <stddef.h>
 #include <stdint.h>
 #include <stddef.h>
 
@@ -119,10 +121,18 @@ struct __tgt_target_table {
 /// This struct contains information exchanged between different asynchronous
 /// operations for device-dependent optimization and potential synchronization
 struct __tgt_async_info {
+  /// Locations we used in (potentially) asynchronous calls which should live
+  /// as long as this AsyncInfoTy object.
+  std::deque<void *> BufferLocations;
+
   // A pointer to a queue-like structure where offloading operations are issued.
   // We assume to use this structure to do synchronization. In CUDA backend, it
   // is CUstream.
   void *Queue = nullptr;
+
+  /// Return a void* reference with a lifetime that is at least as long as this
+  /// AsyncInfoTy object. The location can be used as intermediate buffer.
+  void *&getVoidPtrLocation();
 };
 
 /// This struct is a record of non-contiguous information
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index e4b7b18bc70b..37150aae2fe6 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -18,6 +18,13 @@
 #include <cassert>
 #include <vector>
 
+/// Return a void* reference with a lifetime that is at least as long as this
+/// AsyncInfoTy object. The location can be used as intermediate buffer.
+void *&__tgt_async_info::getVoidPtrLocation() {
+  BufferLocations.push_back(nullptr);
+  return BufferLocations.back();
+}
+
 /* All begin addresses for partially mapped structs must be 8-aligned in order
  * to ensure proper alignment of members. E.g.
  *
@@ -415,7 +422,8 @@ int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num,
       DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
          DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
       uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
-      void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
+      void *&TgtPtrBase = async_info_ptr->getVoidPtrLocation();
+      TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
       int rt = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
                                  sizeof(void *), async_info_ptr);
       if (rt != OFFLOAD_SUCCESS) {
@@ -1122,8 +1130,9 @@ static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr,
         DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
         uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
         void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
-        void *PointerTgtPtrBegin = Device.getTgtPtrBegin(
-            HstPtrVal, ArgSizes[I], IsLast, false, IsHostPtr);
+        void *&PointerTgtPtrBegin = AsyncInfo->getVoidPtrLocation();
+        PointerTgtPtrBegin = Device.getTgtPtrBegin(HstPtrVal, ArgSizes[I],
+                                                   IsLast, false, IsHostPtr);
         if (!PointerTgtPtrBegin) {
           DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
              DPxPTR(HstPtrVal));

From a3545a0b0777da773c5e2370622579c44a8f0f63 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 19 Feb 2021 09:06:05 -0500
Subject: [PATCH 154/244] [Analysis][LoopVectorize] do not form reductions of
 pointers

This is a fix for https://llvm.org/PR49215 either before/after
we make a verifier enhancement for vector reductions with D96904.

I'm not sure what the current thinking is for pointer math/logic
in IR. We allow icmp on pointer values. Therefore, we match min/max
patterns, so without this patch, the vectorizer could form a vector
reduction from that sequence.

But the LangRef definitions for min/max and vector reduction
intrinsics do not allow pointer types:
https://llvm.org/docs/LangRef.html#llvm-smax-intrinsic
https://llvm.org/docs/LangRef.html#llvm-vector-reduce-umax-intrinsic

So we would crash/assert at some point - either in IR verification,
in the cost model, or in codegen. If we do want to allow this kind
of transform, we will need to update the LangRef and all of those
parts of the compiler.

Differential Revision: https://reviews.llvm.org/D97047

(cherry picked from commit 5b250a27ec7822aa0a32abb696cb16c2cc60149c)
---
 llvm/lib/Analysis/IVDescriptors.cpp           |  5 ++-
 .../Transforms/LoopVectorize/reduction-ptr.ll | 40 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-ptr.ll

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 7f311d8f9a2b..94a24ccf2155 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -243,11 +243,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   if (RecurrenceType->isFloatingPointTy()) {
     if (!isFloatingPointRecurrenceKind(Kind))
       return false;
-  } else {
+  } else if (RecurrenceType->isIntegerTy()) {
     if (!isIntegerRecurrenceKind(Kind))
       return false;
     if (isArithmeticRecurrenceKind(Kind))
       Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+  } else {
+    // Pointer min/max may exist, but it is not supported as a reduction op.
+    return false;
   }
 
   Worklist.push_back(Start);
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
new file mode 100644
index 000000000000..5cae61638f31
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; Reductions of pointer types are not supported.
+
+define void @PR49215(i32* %p, i32* %q) {
+; CHECK-LABEL: @PR49215(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[G:%.*]] = phi i32* [ [[P:%.*]], [[ENTRY]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32* [[Q:%.*]], [[G]]
+; CHECK-NEXT:    [[UMIN]] = select i1 [[CMP2]], i32* [[Q]], i32* [[G]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], undef
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    [[UMIN_LCSSA:%.*]] = phi i32* [ [[UMIN]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_CAST:%.*]] = ptrtoint i32* [[UMIN_LCSSA]] to i64
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %g = phi i32* [ %p, %entry ], [ %umin, %for.body ]
+  %cmp2 = icmp ult i32* %q, %g
+  %umin = select i1 %cmp2, i32* %q, i32* %g
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, undef
+  br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+  %phi.cast = ptrtoint i32* %umin to i64
+  ret void
+}

From 3444f052006ca2b19052a4599dd9001b01088c25 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Sat, 20 Feb 2021 20:43:16 -0500
Subject: [PATCH 155/244] [clang][Driver][OpenBSD] libcxx also requires pthread

(cherry picked from commit b42d57a100c5df6ace68f686f5adaabeafe8a0f6)
---
 clang/lib/Driver/ToolChains/OpenBSD.cpp | 1 +
 clang/test/Driver/openbsd.cpp           | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index f155d74632f9..e162165b2561 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -296,6 +296,7 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
 
   CmdArgs.push_back(Profiling ? "-lc++_p" : "-lc++");
   CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
+  CmdArgs.push_back(Profiling ? "-lpthread_p" : "-lpthread");
 }
 
 std::string OpenBSD::getCompilerRT(const ArgList &Args,
diff --git a/clang/test/Driver/openbsd.cpp b/clang/test/Driver/openbsd.cpp
index 9293148680c8..23c365d28e7e 100644
--- a/clang/test/Driver/openbsd.cpp
+++ b/clang/test/Driver/openbsd.cpp
@@ -6,7 +6,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-CXX %s
 // RUN: %clangxx %s -### -o %t.o -target arm-unknown-openbsd 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-CXX %s
-// CHECK-CXX: "-lc++" "-lc++abi" "-lm"
+// CHECK-CXX: "-lc++" "-lc++abi" "-lpthread" "-lm"
 
 // RUN: %clangxx %s -### -pg -o %t.o -target amd64-pc-openbsd 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PG-CXX %s
@@ -16,4 +16,4 @@
 // RUN:   | FileCheck --check-prefix=CHECK-PG-CXX %s
 // RUN: %clangxx %s -### -pg -o %t.o -target arm-unknown-openbsd 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-PG-CXX %s
-// CHECK-PG-CXX: "-lc++_p" "-lc++abi_p" "-lm_p"
+// CHECK-PG-CXX: "-lc++_p" "-lc++abi_p" "-lpthread_p" "-lm_p"

From 76e4c93ea42b3d23907611d14e347bfeae8d4b0a Mon Sep 17 00:00:00 2001
From: Conrad Poelman <cpgithub@stellarscience.com>
Date: Tue, 2 Feb 2021 05:59:38 +0100
Subject: [PATCH 156/244] clang-extra: fix incorrect use of std::lock_guard by
 adding variable name (identified by MSVC [[nodiscard]] error)

`std::lock_guard` is an RAII class that needs a variable name whose scope determines the guard's lifetime. This particular usage lacked a variable name, meaning the guard could be destroyed before the line that it was indented to protect.

This line was identified by building clang with the latest MSVC preview release, which declares the std::lock_guard constructor to be `[[nodiscard]]` to draw attention to such issues.

Reviewed By: kadircet

Differential Revision: https://reviews.llvm.org/D95725

(cherry picked from commit 0b70c86e2007d3f32968f0a7d9efe8eab3bf0f0a)
---
 clang-tools-extra/clangd/support/Function.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/support/Function.h b/clang-tools-extra/clangd/support/Function.h
index 2cac1b1e7f67..936800d56985 100644
--- a/clang-tools-extra/clangd/support/Function.h
+++ b/clang-tools-extra/clangd/support/Function.h
@@ -51,7 +51,7 @@ template <typename T> class Event {
     Subscription &operator=(Subscription &&Other) {
       // If *this is active, unsubscribe.
       if (Parent) {
-        std::lock_guard<std::recursive_mutex>(Parent->ListenersMu);
+        std::lock_guard<std::recursive_mutex> Lock(Parent->ListenersMu);
         llvm::erase_if(Parent->Listeners,
                        [&](const std::pair<Listener, unsigned> &P) {
                          return P.second == ListenerID;

From 8eeb3d99933a3246f2d850b807cf54f11a3a8dce Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Sun, 31 Jan 2021 13:53:22 +0100
Subject: [PATCH 157/244] [clangd] Rename: merge index/AST refs
 path-insensitively where needed

If you have c:\foo open, and C:\foo indexed (case difference) then these
need to be considered the same file. Otherwise we emit edits to both,
and editors do... something that isn't pretty.

Maybe more centralized normalization is called for, but it's not trivial
to do this while also being case-preserving. see
https://github.com/clangd/clangd/issues/108

Fixes https://github.com/clangd/clangd/issues/665

Differential Revision: https://reviews.llvm.org/D95759

(cherry picked from commit b63cd4db915c08e0cb4cf668a18de24b67f2c44c)
---
 .../clangd/GlobalCompilationDatabase.cpp      | 14 ------
 clang-tools-extra/clangd/refactor/Rename.cpp  |  4 +-
 .../clangd/support/CMakeLists.txt             |  1 +
 clang-tools-extra/clangd/support/Path.cpp     | 30 ++++++++++++
 clang-tools-extra/clangd/support/Path.h       |  6 +++
 .../clangd/unittests/RenameTests.cpp          | 46 +++++++++++++++++++
 6 files changed, 85 insertions(+), 16 deletions(-)
 create mode 100644 clang-tools-extra/clangd/support/Path.cpp

diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
index 542d0c3e4dbc..a38c8a57d161 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
@@ -396,20 +396,6 @@ DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const {
   return None;
 }
 
-// For platforms where paths are case-insensitive (but case-preserving),
-// we need to do case-insensitive comparisons and use lowercase keys.
-// FIXME: Make Path a real class with desired semantics instead.
-//        This class is not the only place this problem exists.
-// FIXME: Mac filesystems default to case-insensitive, but may be sensitive.
-
-static std::string maybeCaseFoldPath(PathRef Path) {
-#if defined(_WIN32) || defined(__APPLE__)
-  return Path.lower();
-#else
-  return std::string(Path);
-#endif
-}
-
 std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
 DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
     llvm::ArrayRef<llvm::StringRef> Dirs) const {
diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp
index d3c7da96a441..a857b3479871 100644
--- a/clang-tools-extra/clangd/refactor/Rename.cpp
+++ b/clang-tools-extra/clangd/refactor/Rename.cpp
@@ -68,7 +68,7 @@ llvm::Optional<std::string> getOtherRefFile(const Decl &D, StringRef MainFile,
     if (OtherFile)
       return;
     if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) {
-      if (*RefFilePath != MainFile)
+      if (!pathEqual(*RefFilePath, MainFile))
         OtherFile = *RefFilePath;
     }
   });
@@ -474,7 +474,7 @@ findOccurrencesOutsideFile(const NamedDecl &RenameDecl,
     if ((R.Kind & RefKind::Spelled) == RefKind::Unknown)
       return;
     if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) {
-      if (*RefFilePath != MainFile)
+      if (!pathEqual(*RefFilePath, MainFile))
         AffectedFiles[*RefFilePath].push_back(toRange(R.Location));
     }
   });
diff --git a/clang-tools-extra/clangd/support/CMakeLists.txt b/clang-tools-extra/clangd/support/CMakeLists.txt
index f0fe073eb136..fc7d7a28117b 100644
--- a/clang-tools-extra/clangd/support/CMakeLists.txt
+++ b/clang-tools-extra/clangd/support/CMakeLists.txt
@@ -23,6 +23,7 @@ add_clang_library(clangdSupport
   Logger.cpp
   Markup.cpp
   MemoryTree.cpp
+  Path.cpp
   Shutdown.cpp
   Threading.cpp
   ThreadsafeFS.cpp
diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp
new file mode 100644
index 000000000000..f72d00070f34
--- /dev/null
+++ b/clang-tools-extra/clangd/support/Path.cpp
@@ -0,0 +1,30 @@
+//===--- Path.cpp -------------------------------------------*- C++-*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/Path.h"
+namespace clang {
+namespace clangd {
+
+std::string maybeCaseFoldPath(PathRef Path) {
+#if defined(_WIN32) || defined(__APPLE__)
+  return Path.lower();
+#else
+  return std::string(Path);
+#endif
+}
+
+bool pathEqual(PathRef A, PathRef B) {
+#if defined(_WIN32) || defined(__APPLE__)
+  return A.equals_lower(B);
+#else
+  return A == B;
+#endif
+}
+
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/support/Path.h b/clang-tools-extra/clangd/support/Path.h
index 4d4ad7f49047..402903130f01 100644
--- a/clang-tools-extra/clangd/support/Path.h
+++ b/clang-tools-extra/clangd/support/Path.h
@@ -22,6 +22,12 @@ using Path = std::string;
 /// signatures.
 using PathRef = llvm::StringRef;
 
+// For platforms where paths are case-insensitive (but case-preserving),
+// we need to do case-insensitive comparisons and use lowercase keys.
+// FIXME: Make Path a real class with desired semantics instead.
+std::string maybeCaseFoldPath(PathRef Path);
+bool pathEqual(PathRef, PathRef);
+
 } // namespace clangd
 } // namespace clang
 
diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp
index 4bc03796bb2b..e25850a68fe9 100644
--- a/clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -1067,6 +1067,52 @@ TEST(RenameTest, Renameable) {
   }
 }
 
+MATCHER_P(newText, T, "") { return arg.newText == T; }
+
+TEST(RenameTest, IndexMergeMainFile) {
+  Annotations Code("int ^x();");
+  TestTU TU = TestTU::withCode(Code.code());
+  TU.Filename = "main.cc";
+  auto AST = TU.build();
+
+  auto Main = testPath("main.cc");
+
+  auto Rename = [&](const SymbolIndex *Idx) {
+    auto GetDirtyBuffer = [&](PathRef Path) -> llvm::Optional<std::string> {
+      return Code.code().str(); // Every file has the same content.
+    };
+    RenameOptions Opts;
+    Opts.AllowCrossFile = true;
+    RenameInputs Inputs{Code.point(), "xPrime", AST,           Main,
+                        Idx,          Opts,     GetDirtyBuffer};
+    auto Results = rename(Inputs);
+    EXPECT_TRUE(bool(Results)) << llvm::toString(Results.takeError());
+    return std::move(*Results);
+  };
+
+  // We do not expect to see duplicated edits from AST vs index.
+  auto Results = Rename(TU.index().get());
+  EXPECT_THAT(Results.GlobalChanges.keys(), ElementsAre(Main));
+  EXPECT_THAT(Results.GlobalChanges[Main].asTextEdits(),
+              ElementsAre(newText("xPrime")));
+
+  // Sanity check: we do expect to see index results!
+  TU.Filename = "other.cc";
+  Results = Rename(TU.index().get());
+  EXPECT_THAT(Results.GlobalChanges.keys(),
+              UnorderedElementsAre(Main, testPath("other.cc")));
+
+#if defined(_WIN32) || defined(__APPLE__)
+  // On case-insensitive systems, no duplicates if AST vs index case differs.
+  // https://github.com/clangd/clangd/issues/665
+  TU.Filename = "MAIN.CC";
+  Results = Rename(TU.index().get());
+  EXPECT_THAT(Results.GlobalChanges.keys(), ElementsAre(Main));
+  EXPECT_THAT(Results.GlobalChanges[Main].asTextEdits(),
+              ElementsAre(newText("xPrime")));
+#endif
+}
+
 TEST(RenameTest, MainFileReferencesOnly) {
   // filter out references not from main file.
   llvm::StringRef Test =

From d8404633401509936600b60274b72fc03f11f040 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Mon, 15 Feb 2021 09:00:49 +0100
Subject: [PATCH 158/244] [clangd] Treat paths case-insensitively depending on
 the platform

Path{Match,Exclude} and MountPoint were checking paths case-sensitively
on all platforms, as with other features, this was causing problems on
windows. Since users can have capital drive letters on config files, but
editors might lower-case them.

This patch addresses that issue by:
- Creating regexes with case-insensitive matching on those platforms.
- Introducing a new pathIsAncestor helper, which performs checks in a
  case-correct manner where needed.

Differential Revision: https://reviews.llvm.org/D96690

(cherry picked from commit ecea7218fb9b994b26471e9877851cdb51a5f1d4)
---
 clang-tools-extra/clangd/ConfigCompile.cpp    | 20 +++++++---
 clang-tools-extra/clangd/support/Path.cpp     | 37 ++++++++++++-------
 clang-tools-extra/clangd/support/Path.h       | 12 ++++++
 .../clangd/unittests/CMakeLists.txt           |  1 +
 .../clangd/unittests/ConfigCompileTests.cpp   | 36 ++++++++++++++++++
 .../clangd/unittests/RenameTests.cpp          |  2 +-
 .../clangd/unittests/support/PathTests.cpp    | 36 ++++++++++++++++++
 7 files changed, 124 insertions(+), 20 deletions(-)
 create mode 100644 clang-tools-extra/clangd/unittests/support/PathTests.cpp

diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index 8682cae36f26..dadc578c3a81 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -31,6 +31,7 @@
 #include "Features.inc"
 #include "TidyProvider.h"
 #include "support/Logger.h"
+#include "support/Path.h"
 #include "support/Trace.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
@@ -101,9 +102,11 @@ struct FragmentCompiler {
   // Normalized Fragment::SourceInfo::Directory.
   std::string FragmentDirectory;
 
-  llvm::Optional<llvm::Regex> compileRegex(const Located<std::string> &Text) {
+  llvm::Optional<llvm::Regex>
+  compileRegex(const Located<std::string> &Text,
+               llvm::Regex::RegexFlags Flags = llvm::Regex::NoFlags) {
     std::string Anchored = "^(" + *Text + ")$";
-    llvm::Regex Result(Anchored);
+    llvm::Regex Result(Anchored, Flags);
     std::string RegexError;
     if (!Result.isValid(RegexError)) {
       diag(Error, "Invalid regex " + Anchored + ": " + RegexError, Text.Range);
@@ -195,9 +198,15 @@ struct FragmentCompiler {
     if (F.HasUnrecognizedCondition)
       Out.Conditions.push_back([&](const Params &) { return false; });
 
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
+    llvm::Regex::RegexFlags Flags = llvm::Regex::IgnoreCase;
+#else
+    llvm::Regex::RegexFlags Flags = llvm::Regex::NoFlags;
+#endif
+
     auto PathMatch = std::make_unique<std::vector<llvm::Regex>>();
     for (auto &Entry : F.PathMatch) {
-      if (auto RE = compileRegex(Entry))
+      if (auto RE = compileRegex(Entry, Flags))
         PathMatch->push_back(std::move(*RE));
     }
     if (!PathMatch->empty()) {
@@ -218,7 +227,7 @@ struct FragmentCompiler {
 
     auto PathExclude = std::make_unique<std::vector<llvm::Regex>>();
     for (auto &Entry : F.PathExclude) {
-      if (auto RE = compileRegex(Entry))
+      if (auto RE = compileRegex(Entry, Flags))
         PathExclude->push_back(std::move(*RE));
     }
     if (!PathExclude->empty()) {
@@ -349,7 +358,8 @@ struct FragmentCompiler {
       return;
     Spec.MountPoint = std::move(*AbsPath);
     Out.Apply.push_back([Spec(std::move(Spec))](const Params &P, Config &C) {
-      if (!P.Path.startswith(Spec.MountPoint))
+      if (P.Path.empty() || !pathStartsWith(Spec.MountPoint, P.Path,
+                                            llvm::sys::path::Style::posix))
         return;
       C.Index.External = Spec;
       // Disable background indexing for the files under the mountpoint.
diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp
index f72d00070f34..6fc74b92fc7a 100644
--- a/clang-tools-extra/clangd/support/Path.cpp
+++ b/clang-tools-extra/clangd/support/Path.cpp
@@ -7,24 +7,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "support/Path.h"
+#include "llvm/Support/Path.h"
 namespace clang {
 namespace clangd {
 
-std::string maybeCaseFoldPath(PathRef Path) {
-#if defined(_WIN32) || defined(__APPLE__)
-  return Path.lower();
-#else
-  return std::string(Path);
-#endif
-}
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
+std::string maybeCaseFoldPath(PathRef Path) { return Path.lower(); }
+bool pathEqual(PathRef A, PathRef B) { return A.equals_lower(B); }
+#else  // NOT CLANGD_PATH_CASE_INSENSITIVE
+std::string maybeCaseFoldPath(PathRef Path) { return Path.str(); }
+bool pathEqual(PathRef A, PathRef B) { return A == B; }
+#endif // CLANGD_PATH_CASE_INSENSITIVE
 
-bool pathEqual(PathRef A, PathRef B) {
-#if defined(_WIN32) || defined(__APPLE__)
-  return A.equals_lower(B);
-#else
-  return A == B;
-#endif
+bool pathStartsWith(PathRef Ancestor, PathRef Path,
+                    llvm::sys::path::Style Style) {
+  assert(llvm::sys::path::is_absolute(Ancestor, Style) &&
+         llvm::sys::path::is_absolute(Path, Style));
+  // If ancestor ends with a separator drop that, so that we can match /foo/ as
+  // a parent of /foo.
+  if (llvm::sys::path::is_separator(Ancestor.back(), Style))
+    Ancestor = Ancestor.drop_back();
+  // Ensure Path starts with Ancestor.
+  if (!pathEqual(Ancestor, Path.take_front(Ancestor.size())))
+    return false;
+  Path = Path.drop_front(Ancestor.size());
+  // Then make sure either two paths are equal or Path has a separator
+  // afterwards.
+  return Path.empty() || llvm::sys::path::is_separator(Path.front(), Style);
 }
-
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/support/Path.h b/clang-tools-extra/clangd/support/Path.h
index 402903130f01..938d7d7e99c9 100644
--- a/clang-tools-extra/clangd/support/Path.h
+++ b/clang-tools-extra/clangd/support/Path.h
@@ -10,8 +10,14 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Path.h"
 #include <string>
 
+/// Whether current platform treats paths case insensitively.
+#if defined(_WIN32) || defined(__APPLE__)
+#define CLANGD_PATH_CASE_INSENSITIVE
+#endif
+
 namespace clang {
 namespace clangd {
 
@@ -28,6 +34,12 @@ using PathRef = llvm::StringRef;
 std::string maybeCaseFoldPath(PathRef Path);
 bool pathEqual(PathRef, PathRef);
 
+/// Checks if \p Ancestor is a proper ancestor of \p Path. This is just a
+/// smarter lexical prefix match, e.g: foo/bar/baz doesn't start with foo/./bar.
+/// Both \p Ancestor and \p Path must be absolute.
+bool pathStartsWith(
+    PathRef Ancestor, PathRef Path,
+    llvm::sys::path::Style Style = llvm::sys::path::Style::native);
 } // namespace clangd
 } // namespace clang
 
diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt
index f4d364720eaf..c396c6f5873b 100644
--- a/clang-tools-extra/clangd/unittests/CMakeLists.txt
+++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt
@@ -104,6 +104,7 @@ add_unittest(ClangdUnitTests ClangdTests
   support/FunctionTests.cpp
   support/MarkupTests.cpp
   support/MemoryTreeTests.cpp
+  support/PathTests.cpp
   support/ThreadingTests.cpp
   support/TestTracer.cpp
   support/TraceTests.cpp
diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
index 4b1da2035727..d9aa171f3102 100644
--- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
@@ -99,6 +99,25 @@ TEST_F(ConfigCompileTests, Condition) {
   Frag.If.PathMatch.emplace_back("ba*r");
   EXPECT_FALSE(compileAndApply());
   EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+
+  // Only matches case-insensitively.
+  Frag = {};
+  Frag.If.PathMatch.emplace_back("B.*R");
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
+  EXPECT_TRUE(compileAndApply());
+#else
+  EXPECT_FALSE(compileAndApply());
+#endif
+
+  Frag = {};
+  Frag.If.PathExclude.emplace_back("B.*R");
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
+  EXPECT_FALSE(compileAndApply());
+#else
+  EXPECT_TRUE(compileAndApply());
+#endif
 }
 
 TEST_F(ConfigCompileTests, CompileCommands) {
@@ -406,6 +425,23 @@ TEST_F(ConfigCompileTests, ExternalBlockMountPoint) {
   ASSERT_THAT(Diags.Diagnostics, IsEmpty());
   ASSERT_TRUE(Conf.Index.External);
   EXPECT_THAT(Conf.Index.External->MountPoint, FooPath);
+
+  // Only matches case-insensitively.
+  BazPath = testPath("fOo/baz.h", llvm::sys::path::Style::posix);
+  BazPath = llvm::sys::path::convert_to_slash(BazPath);
+  Parm.Path = BazPath;
+
+  FooPath = testPath("FOO/", llvm::sys::path::Style::posix);
+  FooPath = llvm::sys::path::convert_to_slash(FooPath);
+  Frag = GetFrag("", FooPath.c_str());
+  compileAndApply();
+  ASSERT_THAT(Diags.Diagnostics, IsEmpty());
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
+  ASSERT_TRUE(Conf.Index.External);
+  EXPECT_THAT(Conf.Index.External->MountPoint, FooPath);
+#else
+  ASSERT_FALSE(Conf.Index.External);
+#endif
 }
 } // namespace
 } // namespace config
diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp
index e25850a68fe9..b2c83a1a4303 100644
--- a/clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -1102,7 +1102,7 @@ TEST(RenameTest, IndexMergeMainFile) {
   EXPECT_THAT(Results.GlobalChanges.keys(),
               UnorderedElementsAre(Main, testPath("other.cc")));
 
-#if defined(_WIN32) || defined(__APPLE__)
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
   // On case-insensitive systems, no duplicates if AST vs index case differs.
   // https://github.com/clangd/clangd/issues/665
   TU.Filename = "MAIN.CC";
diff --git a/clang-tools-extra/clangd/unittests/support/PathTests.cpp b/clang-tools-extra/clangd/unittests/support/PathTests.cpp
new file mode 100644
index 000000000000..26b999d103a0
--- /dev/null
+++ b/clang-tools-extra/clangd/unittests/support/PathTests.cpp
@@ -0,0 +1,36 @@
+//===-- PathTests.cpp -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/Path.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+TEST(PathTests, IsAncestor) {
+  EXPECT_TRUE(pathStartsWith("/foo", "/foo"));
+  EXPECT_TRUE(pathStartsWith("/foo/", "/foo"));
+
+  EXPECT_FALSE(pathStartsWith("/foo", "/fooz"));
+  EXPECT_FALSE(pathStartsWith("/foo/", "/fooz"));
+
+  EXPECT_TRUE(pathStartsWith("/foo", "/foo/bar"));
+  EXPECT_TRUE(pathStartsWith("/foo/", "/foo/bar"));
+
+#ifdef CLANGD_PATH_CASE_INSENSITIVE
+  EXPECT_TRUE(pathStartsWith("/fOo", "/foo/bar"));
+  EXPECT_TRUE(pathStartsWith("/foo", "/fOo/bar"));
+#else
+  EXPECT_FALSE(pathStartsWith("/fOo", "/foo/bar"));
+  EXPECT_FALSE(pathStartsWith("/foo", "/fOo/bar"));
+#endif
+}
+} // namespace
+} // namespace clangd
+} // namespace clang

From b60110090a942078bbacf71db166c2353c340413 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Tue, 16 Feb 2021 20:57:00 +0100
Subject: [PATCH 159/244] [clangd] Fix windows buildbots after
 ecea7218fb9b994b26471e9877851cdb51a5f1d4

(cherry picked from commit cdef5a7161767c2c4b3b7cb2542cf1d29b6d4a09)
---
 clang-tools-extra/clangd/support/Path.cpp     |  4 ++--
 .../clangd/unittests/support/PathTests.cpp    | 21 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp
index 6fc74b92fc7a..a7907cffe60c 100644
--- a/clang-tools-extra/clangd/support/Path.cpp
+++ b/clang-tools-extra/clangd/support/Path.cpp
@@ -21,8 +21,8 @@ bool pathEqual(PathRef A, PathRef B) { return A == B; }
 
 bool pathStartsWith(PathRef Ancestor, PathRef Path,
                     llvm::sys::path::Style Style) {
-  assert(llvm::sys::path::is_absolute(Ancestor, Style) &&
-         llvm::sys::path::is_absolute(Path, Style));
+  assert(llvm::sys::path::is_absolute(Ancestor) &&
+         llvm::sys::path::is_absolute(Path));
   // If ancestor ends with a separator drop that, so that we can match /foo/ as
   // a parent of /foo.
   if (llvm::sys::path::is_separator(Ancestor.back(), Style))
diff --git a/clang-tools-extra/clangd/unittests/support/PathTests.cpp b/clang-tools-extra/clangd/unittests/support/PathTests.cpp
index 26b999d103a0..599c76926d30 100644
--- a/clang-tools-extra/clangd/unittests/support/PathTests.cpp
+++ b/clang-tools-extra/clangd/unittests/support/PathTests.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "TestFS.h"
 #include "support/Path.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -14,21 +15,21 @@ namespace clang {
 namespace clangd {
 namespace {
 TEST(PathTests, IsAncestor) {
-  EXPECT_TRUE(pathStartsWith("/foo", "/foo"));
-  EXPECT_TRUE(pathStartsWith("/foo/", "/foo"));
+  EXPECT_TRUE(pathStartsWith(testPath("foo"), testPath("foo")));
+  EXPECT_TRUE(pathStartsWith(testPath("foo/"), testPath("foo")));
 
-  EXPECT_FALSE(pathStartsWith("/foo", "/fooz"));
-  EXPECT_FALSE(pathStartsWith("/foo/", "/fooz"));
+  EXPECT_FALSE(pathStartsWith(testPath("foo"), testPath("fooz")));
+  EXPECT_FALSE(pathStartsWith(testPath("foo/"), testPath("fooz")));
 
-  EXPECT_TRUE(pathStartsWith("/foo", "/foo/bar"));
-  EXPECT_TRUE(pathStartsWith("/foo/", "/foo/bar"));
+  EXPECT_TRUE(pathStartsWith(testPath("foo"), testPath("foo/bar")));
+  EXPECT_TRUE(pathStartsWith(testPath("foo/"), testPath("foo/bar")));
 
 #ifdef CLANGD_PATH_CASE_INSENSITIVE
-  EXPECT_TRUE(pathStartsWith("/fOo", "/foo/bar"));
-  EXPECT_TRUE(pathStartsWith("/foo", "/fOo/bar"));
+  EXPECT_TRUE(pathStartsWith(testPath("fOo"), testPath("foo/bar")));
+  EXPECT_TRUE(pathStartsWith(testPath("foo"), testPath("fOo/bar")));
 #else
-  EXPECT_FALSE(pathStartsWith("/fOo", "/foo/bar"));
-  EXPECT_FALSE(pathStartsWith("/foo", "/fOo/bar"));
+  EXPECT_FALSE(pathStartsWith(testPath("fOo"), testPath("foo/bar")));
+  EXPECT_FALSE(pathStartsWith(testPath("foo"), testPath("fOo/bar")));
 #endif
 }
 } // namespace

From 67d6fbe0f157ba78e8131964d60155dc1090f409 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Mon, 22 Feb 2021 22:05:26 +0100
Subject: [PATCH 160/244] [clangd] Release notes for 12.x

---
 clang-tools-extra/docs/ReleaseNotes.rst | 169 ++++++++++++++++++++++++
 1 file changed, 169 insertions(+)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 2960aad5a556..64b3d224ff6f 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -47,6 +47,9 @@ Major New Features
 Improvements to clangd
 ----------------------
 
+Performance
+^^^^^^^^^^^
+
 - clangd's memory usage is significantly reduced on most Linux systems.
   In particular, memory usage should not increase dramatically over time.
 
@@ -59,6 +62,172 @@ Improvements to clangd
   systems can disable this using ``--malloc_trim=0`` or the CMake flag
   ``-DCLANGD_MALLOC_TRIM=0``.
 
+- Added the `$/memoryUsage request
+  <https://clangd.llvm.org/extensions.html#memory-usage>`_: an LSP extension.
+  This provides a breakdown of the memory clangd thinks it is using (excluding
+  malloc overhead etc). The clangd VSCode extension supports showing the memory
+  usage tree.
+
+Parsing and selection
+^^^^^^^^^^^^^^^^^^^^^
+
+- Improved navigation of broken code in C using Recovery AST. (This has been
+  enabled for C++ since clangd 11).
+
+- Types are understood more often in broken code. (This is the first release
+  where Recovery AST preserves speculated types).
+
+- Heuristic resolution for dependent names in templates.
+
+Code completion
+^^^^^^^^^^^^^^^
+
+- Higher priority for symbols that were already used in this file, and symbols
+  from namespaces mentioned in this file. (Estimated 3% accuracy improvement)
+
+- Introduced a ranking algorithm trained on snippets from a large C++ codebase.
+  Use the flag ``--ranking-model=decision_forest`` to try this (Estimated 6%
+  accuracy improvement). This mode is likely to become the default in future.
+
+  Note: this is a generic model, not specialized for your code. clangd does not
+  collect any data from your code to train code completion.
+
+- Signature help works with functions with template-dependent parameter types.
+
+Go to definition
+^^^^^^^^^^^^^^^^
+
+- Selecting an ``auto`` or ``decltype`` keyword will attempt to navigate to
+  a definition of the deduced type.
+
+- Improved handling of aliases: navigate to the underlying entity more often.
+
+- Better understanding of declaration vs definition for Objective-C classes and
+  protocols.
+
+- Selecting a pure-virtual method shows its overrides.
+
+Find references
+^^^^^^^^^^^^^^^
+
+- Indexes are smarter about not returning stale references when code is deleted.
+
+- References in implementation files are always indexed, so results should be
+  more complete.
+
+- Find-references on a virtual method shows references to overridden methods.
+
+New navigation features
+^^^^^^^^^^^^^^^^^^^^^^^
+
+- Call hierarchy (``textDocument/callHierarchy``) is supported.
+  Only incoming calls are available.
+
+- Go to implementation (``textDocument/implementation``) is supported on
+  abstract classes, and on virtual methods.
+
+- Symbol search (``workspace/symbol``) queries may be partially qualified.
+  That is, typing ``b::Foo`` will match the symbol ``a::b::c::Foo``.
+
+Refactoring
+^^^^^^^^^^^
+
+- New refactoring: populate ``switch`` statement with cases.
+  (This acts as a fix for the ``-Wswitch-enum`` warning).
+
+- Renaming templates is supported, and many other complex cases were fixed.
+
+- Attempting to rename to an invalid or conflicting name can produce an error
+  message rather than broken code. (Not all cases are detected!)
+
+- The accuracy of many code actions has been improved.
+
+Hover
+^^^^^
+
+- Hovers for ``auto`` and ``decltype`` show the type in the same style as other
+  hovers. ``this`` is also now supported.
+
+- Displayed type names are more consistent and idiomatic.
+
+Semantic highlighting
+^^^^^^^^^^^^^^^^^^^^^
+
+- Inactive preprocessor regions (``#ifdef``) are highlighted as comments.
+
+- clangd 12 is the last release with support for the non-standard
+  ``textDocument/semanticHighlights`` notification. Clients sholud migrate to
+  the ``textDocument/semanticTokens`` request added in LSP 3.16.
+
+Remote index (alpha)
+^^^^^^^^^^^^^^^^^^^^
+
+- clangd can now connect to a remote index server instead of building a project
+  index locally. This saves resources in large codebases that are slow to index.
+
+- The server program is ``clangd-index-server``, and it consumes index files
+  produced by ``clangd-indexer``.
+
+- This feature requires clangd to be built with the CMake flag
+  ``-DCLANGD_ENABLE_REMOTE=On``, which requires GRPC libraries and is not
+  enabled by default. Unofficial releases of the remote-index-enabled client
+  and server tools are at https://github.com/clangd/clangd/releases
+
+- Large projects can deploy a shared server, and check in a ``.clangd`` file
+  to enable it (in the ``Index.External`` section). We hope to provide such a
+  server for ``llvm-project`` itself in the near future.
+
+Configuration
+^^^^^^^^^^^^^
+
+- Static and remote indexes can be configured in the ``Index.External`` section.
+  Different static indexes can now be used for different files.
+  (Obsoletes the flag ``--index-file``).
+
+- Diagnostics can be filtered or suppressed in the ``Diagnostics`` section.
+
+- Clang-tidy checks can be enabled/disabled in the ``Diagnostics.ClangTidy``
+  section. (Obsoletes the flag ``--clang-tidy-checks``).
+
+- The compilation database directory can be configured in the ``CompileFlags``
+  section. Different compilation databases can now be specified for different
+  files. (Obsoletes the flag ``--compile-commands-dir``).
+
+- Errors in loaded configuration files are published as LSP diagnostics, and so
+  should be shown in your editor.
+
+`Full reference of configuration options <https://clangd.llvm.org/config.html>`_
+
+System integration
+^^^^^^^^^^^^^^^^^^
+
+- Changes to ``compile_commands.json`` and ``compile_flags.txt`` will take
+  effect the next time a file is parsed, without restarting clangd.
+
+- ``clangd --check=<filename>`` can be run on the command-line to simulate
+  opening a file without actually using an editor. This can be useful to
+  reproduce crashes or aother problems.
+
+- Various fixes to handle filenames correctly (and case-insensitively) on
+  windows.
+
+- If incoming LSP messages are malformed, the logs now contain details.
+
+Miscellaneous
+^^^^^^^^^^^^^
+
+- "Show AST" request
+  (`textDocument/ast <https://clangd.llvm.org/extensions.html#ast>`_)
+  added as an LSP extension. This displays a simplified view of the clang AST
+  for selected code. The clangd VSCode extension supports this.
+
+- clangd should no longer crash while loading old or corrupt index files.
+
+- The flags ``--index``, ``--recovery-ast`` and ``-suggest-missing-includes``
+  have been retired. These features are now always enabled.
+
+- Too many stability and correctness fixes to mention.
+
 Improvements to clang-doc
 -------------------------
 

From a750a2329c433e598f7fc9655d625c5ebb6bc400 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 22 Feb 2021 16:27:19 -0800
Subject: [PATCH 161/244] clang-tidy: Disable
 cppcoreguidlines-prefer-member-initializer check

Fixes https://llvm.org/PR49318
---
 .../cppcoreguidelines/CMakeLists.txt          |   1 -
 .../CppCoreGuidelinesTidyModule.cpp           |   3 -
 .../PreferMemberInitializerCheck.cpp          | 246 ---------
 .../PreferMemberInitializerCheck.h            |  41 --
 clang-tools-extra/docs/ReleaseNotes.rst       |   6 -
 ...reguidelines-prefer-member-initializer.rst | 103 ----
 ...ize-use-default-member-init-assignment.cpp |  31 --
 ...izer-modernize-use-default-member-init.cpp |  30 --
 ...reguidelines-prefer-member-initializer.cpp | 490 ------------------
 9 files changed, 951 deletions(-)
 delete mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp
 delete mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h
 delete mode 100644 clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst
 delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp

diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt
index a9f5b3e0c15b..39c2c552eb73 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt
@@ -13,7 +13,6 @@ add_clang_library(clangTidyCppCoreGuidelinesModule
   NarrowingConversionsCheck.cpp
   NoMallocCheck.cpp
   OwningMemoryCheck.cpp
-  PreferMemberInitializerCheck.cpp
   ProBoundsArrayToPointerDecayCheck.cpp
   ProBoundsConstantArrayIndexCheck.cpp
   ProBoundsPointerArithmeticCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp
index bf613109f0eb..4cb5022888d3 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp
@@ -22,7 +22,6 @@
 #include "NarrowingConversionsCheck.h"
 #include "NoMallocCheck.h"
 #include "OwningMemoryCheck.h"
-#include "PreferMemberInitializerCheck.h"
 #include "ProBoundsArrayToPointerDecayCheck.h"
 #include "ProBoundsConstantArrayIndexCheck.h"
 #include "ProBoundsPointerArithmeticCheck.h"
@@ -67,8 +66,6 @@ class CppCoreGuidelinesModule : public ClangTidyModule {
         "cppcoreguidelines-non-private-member-variables-in-classes");
     CheckFactories.registerCheck<OwningMemoryCheck>(
         "cppcoreguidelines-owning-memory");
-    CheckFactories.registerCheck<PreferMemberInitializerCheck>(
-        "cppcoreguidelines-prefer-member-initializer");
     CheckFactories.registerCheck<ProBoundsArrayToPointerDecayCheck>(
         "cppcoreguidelines-pro-bounds-array-to-pointer-decay");
     CheckFactories.registerCheck<ProBoundsConstantArrayIndexCheck>(
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp
deleted file mode 100644
index 2d7500943860..000000000000
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-//===--- PreferMemberInitializerCheck.cpp - clang-tidy -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "PreferMemberInitializerCheck.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/Lex/Lexer.h"
-
-using namespace clang::ast_matchers;
-
-namespace clang {
-namespace tidy {
-namespace cppcoreguidelines {
-
-static bool isControlStatement(const Stmt *S) {
-  return isa<IfStmt, SwitchStmt, ForStmt, WhileStmt, DoStmt, ReturnStmt,
-             GotoStmt, CXXTryStmt, CXXThrowExpr>(S);
-}
-
-static bool isNoReturnCallStatement(const Stmt *S) {
-  const auto *Call = dyn_cast<CallExpr>(S);
-  if (!Call)
-    return false;
-
-  const FunctionDecl *Func = Call->getDirectCallee();
-  if (!Func)
-    return false;
-
-  return Func->isNoReturn();
-}
-
-static bool isLiteral(const Expr *E) {
-  return isa<StringLiteral, CharacterLiteral, IntegerLiteral, FloatingLiteral,
-             CXXBoolLiteralExpr, CXXNullPtrLiteralExpr>(E);
-}
-
-static bool isUnaryExprOfLiteral(const Expr *E) {
-  if (const auto *UnOp = dyn_cast<UnaryOperator>(E))
-    return isLiteral(UnOp->getSubExpr());
-  return false;
-}
-
-static bool shouldBeDefaultMemberInitializer(const Expr *Value) {
-  if (isLiteral(Value) || isUnaryExprOfLiteral(Value))
-    return true;
-
-  if (const auto *DRE = dyn_cast<DeclRefExpr>(Value))
-    return isa<EnumConstantDecl>(DRE->getDecl());
-
-  return false;
-}
-
-static const std::pair<const FieldDecl *, const Expr *>
-isAssignmentToMemberOf(const RecordDecl *Rec, const Stmt *S) {
-  if (const auto *BO = dyn_cast<BinaryOperator>(S)) {
-    if (BO->getOpcode() != BO_Assign)
-      return std::make_pair(nullptr, nullptr);
-
-    const auto *ME = dyn_cast<MemberExpr>(BO->getLHS()->IgnoreParenImpCasts());
-    if (!ME)
-      return std::make_pair(nullptr, nullptr);
-
-    const auto *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
-    if (!Field)
-      return std::make_pair(nullptr, nullptr);
-
-    if (isa<CXXThisExpr>(ME->getBase()))
-      return std::make_pair(Field, BO->getRHS()->IgnoreParenImpCasts());
-  } else if (const auto *COCE = dyn_cast<CXXOperatorCallExpr>(S)) {
-    if (COCE->getOperator() != OO_Equal)
-      return std::make_pair(nullptr, nullptr);
-
-    const auto *ME =
-        dyn_cast<MemberExpr>(COCE->getArg(0)->IgnoreParenImpCasts());
-    if (!ME)
-      return std::make_pair(nullptr, nullptr);
-
-    const auto *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
-    if (!Field)
-      return std::make_pair(nullptr, nullptr);
-
-    if (isa<CXXThisExpr>(ME->getBase()))
-      return std::make_pair(Field, COCE->getArg(1)->IgnoreParenImpCasts());
-  }
-
-  return std::make_pair(nullptr, nullptr);
-}
-
-PreferMemberInitializerCheck::PreferMemberInitializerCheck(
-    StringRef Name, ClangTidyContext *Context)
-    : ClangTidyCheck(Name, Context),
-      IsUseDefaultMemberInitEnabled(
-          Context->isCheckEnabled("modernize-use-default-member-init")),
-      UseAssignment(OptionsView("modernize-use-default-member-init",
-                                Context->getOptions().CheckOptions, Context)
-                        .get("UseAssignment", false)) {}
-
-void PreferMemberInitializerCheck::storeOptions(
-    ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "UseAssignment", UseAssignment);
-}
-
-void PreferMemberInitializerCheck::registerMatchers(MatchFinder *Finder) {
-  Finder->addMatcher(
-      cxxConstructorDecl(hasBody(compoundStmt()), unless(isInstantiated()))
-          .bind("ctor"),
-      this);
-}
-
-void PreferMemberInitializerCheck::check(
-    const MatchFinder::MatchResult &Result) {
-  const auto *Ctor = Result.Nodes.getNodeAs<CXXConstructorDecl>("ctor");
-  const auto *Body = cast<CompoundStmt>(Ctor->getBody());
-
-  const CXXRecordDecl *Class = Ctor->getParent();
-  SourceLocation InsertPos;
-  bool FirstToCtorInits = true;
-
-  for (const Stmt *S : Body->body()) {
-    if (S->getBeginLoc().isMacroID()) {
-      StringRef MacroName =
-        Lexer::getImmediateMacroName(S->getBeginLoc(), *Result.SourceManager,
-                                     getLangOpts());
-      if (MacroName.contains_lower("assert"))
-        return;
-    }
-    if (isControlStatement(S))
-      return;
-
-    if (isNoReturnCallStatement(S))
-      return;
-
-    if (const auto *CondOp = dyn_cast<ConditionalOperator>(S)) {
-      if (isNoReturnCallStatement(CondOp->getLHS()) ||
-          isNoReturnCallStatement(CondOp->getRHS()))
-        return;
-    }
-
-    const FieldDecl *Field;
-    const Expr *InitValue;
-    std::tie(Field, InitValue) = isAssignmentToMemberOf(Class, S);
-    if (Field) {
-      if (IsUseDefaultMemberInitEnabled && getLangOpts().CPlusPlus11 &&
-          Ctor->isDefaultConstructor() &&
-          (getLangOpts().CPlusPlus20 || !Field->isBitField()) &&
-          (!isa<RecordDecl>(Class->getDeclContext()) ||
-           !cast<RecordDecl>(Class->getDeclContext())->isUnion()) &&
-          shouldBeDefaultMemberInitializer(InitValue)) {
-        auto Diag =
-            diag(S->getBeginLoc(), "%0 should be initialized in an in-class"
-                                   " default member initializer")
-            << Field;
-
-        SourceLocation FieldEnd =
-            Lexer::getLocForEndOfToken(Field->getSourceRange().getEnd(), 0,
-                                       *Result.SourceManager, getLangOpts());
-        Diag << FixItHint::CreateInsertion(FieldEnd,
-                                           UseAssignment ? " = " : "{")
-             << FixItHint::CreateInsertionFromRange(
-                    FieldEnd,
-                    CharSourceRange(InitValue->getSourceRange(), true))
-             << FixItHint::CreateInsertion(FieldEnd, UseAssignment ? "" : "}");
-
-        SourceLocation SemiColonEnd =
-            Lexer::findNextToken(S->getEndLoc(), *Result.SourceManager,
-                                 getLangOpts())
-                ->getEndLoc();
-        CharSourceRange StmtRange =
-            CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd);
-
-        Diag << FixItHint::CreateRemoval(StmtRange);
-      } else {
-        auto Diag =
-            diag(S->getBeginLoc(), "%0 should be initialized in a member"
-                                   " initializer of the constructor")
-            << Field;
-
-        bool AddComma = false;
-        if (!Ctor->getNumCtorInitializers() && FirstToCtorInits) {
-          SourceLocation BodyPos = Ctor->getBody()->getBeginLoc();
-          SourceLocation NextPos = Ctor->getBeginLoc();
-          do {
-            InsertPos = NextPos;
-            NextPos = Lexer::findNextToken(NextPos, *Result.SourceManager,
-                                           getLangOpts())
-                          ->getLocation();
-          } while (NextPos != BodyPos);
-          InsertPos = Lexer::getLocForEndOfToken(
-              InsertPos, 0, *Result.SourceManager, getLangOpts());
-
-          Diag << FixItHint::CreateInsertion(InsertPos, " : ");
-        } else {
-          bool Found = false;
-          for (const auto *Init : Ctor->inits()) {
-            if (Init->isMemberInitializer()) {
-              if (Result.SourceManager->isBeforeInTranslationUnit(
-                      Field->getLocation(), Init->getMember()->getLocation())) {
-                InsertPos = Init->getSourceLocation();
-                Found = true;
-                break;
-              }
-            }
-          }
-
-          if (!Found) {
-            if (Ctor->getNumCtorInitializers()) {
-              InsertPos = Lexer::getLocForEndOfToken(
-                  (*Ctor->init_rbegin())->getSourceRange().getEnd(), 0,
-                  *Result.SourceManager, getLangOpts());
-            }
-            Diag << FixItHint::CreateInsertion(InsertPos, ", ");
-          } else {
-            AddComma = true;
-          }
-        }
-        Diag << FixItHint::CreateInsertion(InsertPos, Field->getName())
-             << FixItHint::CreateInsertion(InsertPos, "(")
-             << FixItHint::CreateInsertionFromRange(
-                    InsertPos,
-                    CharSourceRange(InitValue->getSourceRange(), true))
-             << FixItHint::CreateInsertion(InsertPos, ")");
-        if (AddComma)
-          Diag << FixItHint::CreateInsertion(InsertPos, ", ");
-
-        SourceLocation SemiColonEnd =
-            Lexer::findNextToken(S->getEndLoc(), *Result.SourceManager,
-                                 getLangOpts())
-                ->getEndLoc();
-        CharSourceRange StmtRange =
-            CharSourceRange::getCharRange(S->getBeginLoc(), SemiColonEnd);
-
-        Diag << FixItHint::CreateRemoval(StmtRange);
-        FirstToCtorInits = false;
-      }
-    }
-  }
-}
-
-} // namespace cppcoreguidelines
-} // namespace tidy
-} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h
deleted file mode 100644
index dbef7c98d8e3..000000000000
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===--- PreferMemberInitializerCheck.h - clang-tidy ------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H
-
-#include "../ClangTidyCheck.h"
-
-namespace clang {
-namespace tidy {
-namespace cppcoreguidelines {
-
-/// Finds member initializations in the constructor body which can be placed
-/// into the initialization list instead.
-///
-/// For the user-facing documentation see:
-/// http://clang.llvm.org/extra/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.html
-class PreferMemberInitializerCheck : public ClangTidyCheck {
-public:
-  PreferMemberInitializerCheck(StringRef Name, ClangTidyContext *Context);
-  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
-    return LangOpts.CPlusPlus;
-  }
-  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
-  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
-  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
-
-  const bool IsUseDefaultMemberInitEnabled;
-  const bool UseAssignment;
-};
-
-} // namespace cppcoreguidelines
-} // namespace tidy
-} // namespace clang
-
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_PREFERMEMBERINITIALIZERCHECK_H
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 64b3d224ff6f..b3c9c829198b 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -290,12 +290,6 @@ New checks
   Finds structs that are inefficiently packed or aligned, and recommends
   packing and/or aligning of said structs as needed.
 
-- New :doc:`cppcoreguidelines-prefer-member-initializer
-  <clang-tidy/checks/cppcoreguidelines-prefer-member-initializer>` check.
-
-  Finds member initializations in the constructor body which can be placed into
-  the initialization list instead.
-
 - New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc
   <clang-tidy/checks/bugprone-misplaced-pointer-arithmetic-in-alloc>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst
deleted file mode 100644
index 5a5ee3e57a8c..000000000000
--- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-prefer-member-initializer.rst
+++ /dev/null
@@ -1,103 +0,0 @@
-.. title:: clang-tidy - cppcoreguidelines-prefer-member-initializer
-
-cppcoreguidelines-prefer-member-initializer
-===========================================
-
-Finds member initializations in the constructor body which can be  converted
-into member initializers of the constructor instead. This not only improves
-the readability of the code but also positively affects its performance.
-Class-member assignments inside a control statement or following the first
-control statement are ignored.
-
-This check implements `C.49 <https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#c49-prefer-initialization-to-assignment-in-constructors>`_ from the CppCoreGuidelines.
-
-If the language version is `C++ 11` or above, the constructor is the default
-constructor of the class, the field is not a bitfield (only in case of earlier
-language version than `C++ 20`), furthermore the assigned value is a literal,
-negated literal or ``enum`` constant then the preferred place of the
-initialization is at the class member declaration.
-
-This latter rule is `C.48 <https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#c48-prefer-in-class-initializers-to-member-initializers-in-constructors-for-constant-initializers>`_ from CppCoreGuidelines.
-
-Please note, that this check does not enforce this latter rule for
-initializations already implemented as member initializers. For that purpose
-see check `modernize-use-default-member-init <modernize-use-default-member-init.html>`_.
-
-Example 1
----------
-
-.. code-block:: c++
-
-  class C {
-    int n;
-    int m;
-  public:
-    C() {
-      n = 1; // Literal in default constructor
-      if (dice())
-        return;
-      m = 1;
-    }
-  };
-
-Here ``n`` can be initialized using a default member initializer, unlike
-``m``, as ``m``'s initialization follows a control statement (``if``):
-
-.. code-block:: c++
-
-  class C {
-    int n{1};
-    int m;
-  public:
-    C() {
-      if (dice())
-        return;
-      m = 1;
-    }
-
-Example 2
----------
-
-.. code-block:: c++
-
-  class C {
-    int n;
-    int m;
-  public:
-    C(int nn, int mm) {
-      n = nn; // Neither default constructor nor literal
-      if (dice())
-        return;
-      m = mm;
-    }
-  };
-
-Here ``n`` can be initialized in the constructor initialization list, unlike
-``m``, as ``m``'s initialization follows a control statement (``if``):
-
-.. code-block:: c++
-
-  C(int nn, int mm) : n(nn) {
-    if (dice())
-      return;
-    m = mm;
-  }
-
-.. option:: UseAssignment
-
-   If this option is set to `true` (default is `false`), the check will initialize
-   members with an assignment. In this case the fix of the first example looks
-   like this:
-
-.. code-block:: c++
-
-  class C {
-    int n = 1;
-    int m;
-  public:
-    C() {
-      if (dice())
-        return;
-      m = 1;
-    }
-  };
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp
deleted file mode 100644
index dc6cb7606a0d..000000000000
--- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init-assignment.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer,modernize-use-default-member-init %t -- \
-// RUN: -config="{CheckOptions: [{key: modernize-use-default-member-init.UseAssignment, value: 1}]}"
-
-class Simple1 {
-  int n;
-  // CHECK-FIXES: int n = 0;
-  double x;
-  // CHECK-FIXES: double x = 0.0;
-
-public:
-  Simple1() {
-    n = 0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x = 0.0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  Simple1(int nn, double xx) {
-    // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) {
-    n = nn;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x = xx;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Simple1() = default;
-};
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp
deleted file mode 100644
index fe5bb7c3bb98..000000000000
--- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer-modernize-use-default-member-init.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer,modernize-use-default-member-init %t
-
-class Simple1 {
-  int n;
-  // CHECK-FIXES: int n{0};
-  double x;
-  // CHECK-FIXES: double x{0.0};
-
-public:
-  Simple1() {
-    n = 0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x = 0.0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in an in-class default member initializer [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  Simple1(int nn, double xx) {
-    // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) {
-    n = nn;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x = xx;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Simple1() = default;
-};
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp
deleted file mode 100644
index b5c04c32c9fa..000000000000
--- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-prefer-member-initializer.cpp
+++ /dev/null
@@ -1,490 +0,0 @@
-// RUN: %check_clang_tidy %s cppcoreguidelines-prefer-member-initializer %t -- -- -fcxx-exceptions
-
-extern void __assert_fail (__const char *__assertion, __const char *__file,
-    unsigned int __line, __const char *__function)
-     __attribute__ ((__noreturn__));
-#define assert(expr) \
-  ((expr)  ? (void)(0)  : __assert_fail (#expr, __FILE__, __LINE__, __func__))
-
-class Simple1 {
-  int n;
-  double x;
-
-public:
-  Simple1() {
-    // CHECK-FIXES: Simple1() : n(0), x(0.0) {
-    n = 0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x = 0.0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  Simple1(int nn, double xx) {
-    // CHECK-FIXES: Simple1(int nn, double xx) : n(nn), x(xx) {
-    n = nn;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x = xx;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Simple1() = default;
-};
-
-class Simple2 {
-  int n;
-  double x;
-
-public:
-  Simple2() : n(0) {
-    // CHECK-FIXES: Simple2() : n(0), x(0.0) {
-    x = 0.0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  Simple2(int nn, double xx) : n(nn) {
-    // CHECK-FIXES: Simple2(int nn, double xx) : n(nn), x(xx) {
-    x = xx;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Simple2() = default;
-};
-
-class Simple3 {
-  int n;
-  double x;
-
-public:
-  Simple3() : x(0.0) {
-    // CHECK-FIXES: Simple3() : n(0), x(0.0) {
-    n = 0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  Simple3(int nn, double xx) : x(xx) {
-    // CHECK-FIXES: Simple3(int nn, double xx) : n(nn), x(xx) {
-    n = nn;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Simple3() = default;
-};
-
-int something_int();
-double something_double();
-
-class Simple4 {
-  int n;
-
-public:
-  Simple4() {
-    // CHECK-FIXES: Simple4() : n(something_int()) {
-    n = something_int();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Simple4() = default;
-};
-
-static bool dice();
-
-class Complex1 {
-  int n;
-  int m;
-
-public:
-  Complex1() : n(0) {
-    if (dice())
-      m = 1;
-    // NO-MESSAGES: initialization of 'm' is nested in a conditional expression
-  }
-
-  ~Complex1() = default;
-};
-
-class Complex2 {
-  int n;
-  int m;
-
-public:
-  Complex2() : n(0) {
-    if (!dice())
-      return;
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a conditional expression
-  }
-
-  ~Complex2() = default;
-};
-
-class Complex3 {
-  int n;
-  int m;
-
-public:
-  Complex3() : n(0) {
-    while (dice())
-      m = 1;
-    // NO-MESSAGES: initialization of 'm' is nested in a conditional loop
-  }
-
-  ~Complex3() = default;
-};
-
-class Complex4 {
-  int n;
-  int m;
-
-public:
-  Complex4() : n(0) {
-    while (!dice())
-      return;
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a conditional loop
-  }
-
-  ~Complex4() = default;
-};
-
-class Complex5 {
-  int n;
-  int m;
-
-public:
-  Complex5() : n(0) {
-    do {
-      m = 1;
-      // NO-MESSAGES: initialization of 'm' is nested in a conditional loop
-    } while (dice());
-  }
-
-  ~Complex5() = default;
-};
-
-class Complex6 {
-  int n;
-  int m;
-
-public:
-  Complex6() : n(0) {
-    do {
-      return;
-    } while (!dice());
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a conditional loop
-  }
-
-  ~Complex6() = default;
-};
-
-class Complex7 {
-  int n;
-  int m;
-
-public:
-  Complex7() : n(0) {
-    for (int i = 2; i < 1; ++i) {
-      m = 1;
-    }
-    // NO-MESSAGES: initialization of 'm' is nested into a conditional loop
-  }
-
-  ~Complex7() = default;
-};
-
-class Complex8 {
-  int n;
-  int m;
-
-public:
-  Complex8() : n(0) {
-    for (int i = 0; i < 2; ++i) {
-      return;
-    }
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a conditional loop
-  }
-
-  ~Complex8() = default;
-};
-
-class Complex9 {
-  int n;
-  int m;
-
-public:
-  Complex9() : n(0) {
-    switch (dice()) {
-    case 1:
-      m = 1;
-      // NO-MESSAGES: initialization of 'm' is nested in a conditional expression
-      break;
-    default:
-      break;
-    }
-  }
-
-  ~Complex9() = default;
-};
-
-class Complex10 {
-  int n;
-  int m;
-
-public:
-  Complex10() : n(0) {
-    switch (dice()) {
-    case 1:
-      return;
-      break;
-    default:
-      break;
-    }
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a conditional expression
-  }
-
-  ~Complex10() = default;
-};
-
-class E {};
-int risky(); // may throw
-
-class Complex11 {
-  int n;
-  int m;
-
-public:
-  Complex11() : n(0) {
-    try {
-      risky();
-      m = 1;
-      // NO-MESSAGES: initialization of 'm' follows is nested in a try-block
-    } catch (const E& e) {
-      return;
-    }
-  }
-
-  ~Complex11() = default;
-};
-
-class Complex12 {
-  int n;
-  int m;
-
-public:
-  Complex12() : n(0) {
-    try {
-      risky();
-    } catch (const E& e) {
-      return;
-    }
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a try-block
-  }
-
-  ~Complex12() = default;
-};
-
-class Complex13 {
-  int n;
-  int m;
-
-public:
-  Complex13() : n(0) {
-    return;
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a return statement
-  }
-
-  ~Complex13() = default;
-};
-
-class Complex14 {
-  int n;
-  int m;
-
-public:
-  Complex14() : n(0) {
-    goto X;
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a goto statement
-  X:
-    ;
-  }
-
-  ~Complex14() = default;
-};
-
-void returning();
-
-class Complex15 {
-  int n;
-  int m;
-
-public:
-  Complex15() : n(0) {
-    // CHECK-FIXES: Complex15() : n(0), m(1) {
-    returning();
-    m = 1;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'm' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Complex15() = default;
-};
-
-[[noreturn]] void not_returning();
-
-class Complex16 {
-  int n;
-  int m;
-
-public:
-  Complex16() : n(0) {
-    not_returning();
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a non-returning function call
-  }
-
-  ~Complex16() = default;
-};
-
-class Complex17 {
-  int n;
-  int m;
-
-public:
-  Complex17() : n(0) {
-    throw 1;
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows a 'throw' statement;
-  }
-
-  ~Complex17() = default;
-};
-
-class Complex18 {
-  int n;
-
-public:
-  Complex18() try {
-    n = risky();
-    // NO-MESSAGES: initialization of 'n' in a 'try' body;
-  } catch (const E& e) {
-    n = 0;
-  }
-
-  ~Complex18() = default;
-};
-
-class Complex19 {
-  int n;
-public:
-  Complex19() {
-    // CHECK-FIXES: Complex19() : n(0) {
-    n = 0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  explicit Complex19(int) {
-    // CHECK-FIXES: Complex19(int) : n(12) {
-    n = 12;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-
-  ~Complex19() = default;
-};
-
-class Complex20 {
-  int n;
-  int m;
-
-public:
-  Complex20(int k) : n(0) {
-    assert(k > 0);
-    m = 1;
-    // NO-MESSAGES: initialization of 'm' follows an assertion
-  }
-
-  ~Complex20() = default;
-};
-
-class VeryComplex1 {
-  int n1, n2, n3;
-  double x1, x2, x3;
-  int n4, n5, n6;
-  double x4, x5, x6;
-
-  VeryComplex1() : n3(something_int()), x3(something_double()),
-                   n5(something_int()), x4(something_double()),
-                   x5(something_double()) {
-    // CHECK-FIXES: VeryComplex1() : n2(something_int()), n1(something_int()), n3(something_int()), x2(something_double()), x1(something_double()), x3(something_double()),
-    // CHECK-FIXES:                  n4(something_int()), n5(something_int()), n6(something_int()), x4(something_double()),
-    // CHECK-FIXES:                  x5(something_double()), x6(something_double()) {
-
-// FIXME: Order of elements on the constructor initializer list should match
-//        the order of the declaration of the fields. Thus the correct fixes
-//        should look like these:
-//
-    // C ECK-FIXES: VeryComplex1() : n2(something_int()), n1(something_int()), n3(something_int()), x2(something_double()), x1(something_double()), x3(something_double()),
-    // C ECK-FIXES:                  n4(something_int()), n5(something_int()), n6(something_int()), x4(something_double()),
-    // C ECK-FIXES:                  x5(something_double()), x6(something_double()) {
-//
-//        However, the Diagnostics Engine processes fixes in the order of the
-//        diagnostics and insertions to the same position are handled in left to
-//        right order thus in the case two adjacent fields are initialized
-//        inside the constructor in reverse order the provided fix is a
-//        constructor initializer list that does not match the order of the
-//        declaration of the fields.
-
-    x2 = something_double();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x2' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    n2 = something_int();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n2' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x6 = something_double();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x6' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    x1 = something_double();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'x1' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    n6 = something_int();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n6' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    n1 = something_int();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n1' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-    n4 = something_int();
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'n4' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  }
-};
-
-struct Outside {
-  int n;
-  double x;
-  Outside();
-};
-
-Outside::Outside() {
-    // CHECK-FIXES: Outside::Outside() : n(1), x(1.0) {
-  n = 1;
-    // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'n' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-  x = 1.0;
-    // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'x' should be initialized in a member initializer of the constructor [cppcoreguidelines-prefer-member-initializer]
-    // CHECK-FIXES: {{^\ *$}}
-}

From da7fa7457800394d610e8cbd6befe7bc944ca7d0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 21 Feb 2021 16:24:18 +0100
Subject: [PATCH 162/244] [JumpThreading] Clone noalias.scope.decl when
 threading blocks

When cloning instructions during jump threading, also clone and
adapt any declared scopes. This is primarily important when
threading loop exits, because we'll end up with two dominating
scope declarations in that case (at least after additional loop
rotation). This addresses a loose thread from
https://reviews.llvm.org/rG2556b413a7b8#975012.

Differential Revision: https://reviews.llvm.org/D97154

(cherry picked from commit 5e7e499b912d2c9ebaa91b5783ca123dbedeabcc)
---
 llvm/include/llvm/Transforms/Utils/Cloning.h  |  7 +++
 llvm/lib/Transforms/Scalar/JumpThreading.cpp  | 10 +++
 llvm/lib/Transforms/Utils/CloneFunction.cpp   |  8 +++
 .../JumpThreading/noalias-scope-decl.ll       | 63 +++++++++++++++++++
 4 files changed, 88 insertions(+)
 create mode 100644 llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 56aaa5d48e2a..aa960c625630 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -274,6 +274,13 @@ void updateProfileCallee(
 void identifyNoAliasScopesToClone(
     ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
 
+/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
+/// instruction range and extract their scope. These are candidates for
+/// duplication when cloning.
+void identifyNoAliasScopesToClone(
+    BasicBlock::iterator Start, BasicBlock::iterator End,
+    SmallVectorImpl<MDNode *> &NoAliasDeclScopes);
+
 /// Duplicate the specified list of noalias decl scopes.
 /// The 'Ext' string is added as an extension to the name.
 /// Afterwards, the ClonedScopes contains the mapping of the original scope
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 96aef90c1c1a..10b08b4e2224 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -2076,6 +2076,15 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
     ValueMapping[PN] = NewPN;
   }
 
+  // Clone noalias scope declarations in the threaded block. When threading a
+  // loop exit, we would otherwise end up with two idential scope declarations
+  // visible at the same time.
+  SmallVector<MDNode *> NoAliasScopes;
+  DenseMap<MDNode *, MDNode *> ClonedScopes;
+  LLVMContext &Context = PredBB->getContext();
+  identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);
+  cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
+
   // Clone the non-phi instructions of the source basic block into NewBB,
   // keeping track of the mapping and using it to remap operands in the cloned
   // instructions.
@@ -2084,6 +2093,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
     New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
     ValueMapping[&*BI] = New;
+    adaptNoAliasScopes(New, ClonedScopes, Context);
 
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 51a49574e55d..6ab061510a60 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -989,3 +989,11 @@ void llvm::identifyNoAliasScopesToClone(
       if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
         NoAliasDeclScopes.push_back(Decl->getScopeList());
 }
+
+void llvm::identifyNoAliasScopesToClone(
+    BasicBlock::iterator Start, BasicBlock::iterator End,
+    SmallVectorImpl<MDNode *> &NoAliasDeclScopes) {
+  for (Instruction &I : make_range(Start, End))
+    if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+      NoAliasDeclScopes.push_back(Decl->getScopeList());
+}
diff --git a/llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll b/llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll
new file mode 100644
index 000000000000..b032afaaf313
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/noalias-scope-decl.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+define void @test(i8* %ptr) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !0)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[I]], 100
+; CHECK-NEXT:    br i1 [[C]], label [[EXIT:%.*]], label [[LATCH]]
+; CHECK:       latch:
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !3)
+; CHECK-NEXT:    store i8 0, i8* [[PTR:%.*]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 1, i8* [[PTR]], align 1, !noalias !3
+; CHECK-NEXT:    [[I_INC]] = add i32 [[I]], 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK-NEXT:    store i8 0, i8* [[PTR]], align 1, !noalias !0
+; CHECK-NEXT:    store i8 1, i8* [[PTR]], align 1, !noalias !5
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.experimental.noalias.scope.decl(metadata !0)
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %latch ]
+  %c = icmp eq i32 %i, 100
+  br i1 %c, label %if, label %latch
+
+if:
+  br label %latch
+
+latch:
+  %p = phi i1 [ true, %if ], [ false, %loop ]
+  call void @llvm.experimental.noalias.scope.decl(metadata !3)
+  store i8 0, i8* %ptr, !noalias !0
+  store i8 1, i8* %ptr, !noalias !3
+  %i.inc = add i32 %i, 1
+  br i1 %p, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+declare void @llvm.experimental.noalias.scope.decl(metadata)
+
+!0 = !{!1}
+!1 = distinct !{!1, !2, !"scope1"}
+!2 = distinct !{!2, !"domain"}
+!3 = !{!4}
+!4 = distinct !{!4, !2, !"scope2"}
+
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2, !"scope1"}
+; CHECK: !2 = distinct !{!2, !"domain"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2, !"scope2"}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !2, !"scope2:thread"}

From a92ceea91116e7b95d23eff634507fa2cff86ef2 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 22 Feb 2021 17:35:09 -0800
Subject: [PATCH 163/244] Revert "[llvm-cov] reset executation count to 0 after
 wrapped segment"

This reverts commit e3df9471750935876bd2bf7da93ccf0eacca8592.

This commit caused regressions in coverage generation for both Rust and
Swift.  We're reverting this in the release/12.x branch until we have
a proper fix in trunk.

http://llvm.org/PR49297
---
 llvm/lib/ProfileData/Coverage/CoverageMapping.cpp   | 1 -
 llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h  | 2 +-
 llvm/test/tools/llvm-cov/ignore-filename-regex.test | 4 ++--
 llvm/unittests/ProfileData/CoverageMappingTest.cpp  | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index a8cc308b4e3a..cdbcde50d33a 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -794,7 +794,6 @@ LineCoverageStats::LineCoverageStats(
     ExecutionCount = WrappedSegment->Count;
   if (!MinRegionCount)
     return;
-  ExecutionCount = 0;
   for (const auto *LS : LineSegments)
     if (isStartOfRegion(LS))
       ExecutionCount = std::max(ExecutionCount, LS->Count);
diff --git a/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h b/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h
index d224fd0d00ea..07941f9bb497 100644
--- a/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h
+++ b/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h
@@ -12,7 +12,7 @@ template <class T> T FOO<T>::DoIt(T ti) { // HEADER: [[@LINE]]|  2|template
   for (T I = 0; I < ti; I++) {            // HEADER: [[@LINE]]| 22|  for (T
     t += I;                               // HEADER: [[@LINE]]| 20|   t += I;
     if (I > ti / 2)                       // HEADER: [[@LINE]]| 20|   if (I > ti
-      t -= 1;                             // HEADER: [[@LINE]]|  8|     t -= 1;
+      t -= 1;                             // HEADER: [[@LINE]]| 20|     t -= 1;
   }                                       // HEADER: [[@LINE]]| 20| }
                                           // HEADER: [[@LINE]]|  2|
   return t;                               // HEADER: [[@LINE]]|  2|  return t;
diff --git a/llvm/test/tools/llvm-cov/ignore-filename-regex.test b/llvm/test/tools/llvm-cov/ignore-filename-regex.test
index efc4cda4abc0..aea9e4646776 100644
--- a/llvm/test/tools/llvm-cov/ignore-filename-regex.test
+++ b/llvm/test/tools/llvm-cov/ignore-filename-regex.test
@@ -22,7 +22,7 @@ REPORT_IGNORE_DIR-NOT: {{.*}}extra{{[/\\]}}dec.h{{.*}}
 REPORT_IGNORE_DIR-NOT: {{.*}}extra{{[/\\]}}inc.h{{.*}}
 REPORT_IGNORE_DIR: {{.*}}abs.h{{.*}}
 REPORT_IGNORE_DIR: {{.*}}main.cc{{.*}}
-REPORT_IGNORE_DIR: {{^}}TOTAL 5{{.*}}90.00%{{$}}
+REPORT_IGNORE_DIR: {{^}}TOTAL 5{{.*}}100.00%{{$}}
 
 # Ignore all files from "extra" directory even when SOURCES specified.
 RUN: llvm-cov report -instr-profile %S/Inputs/sources_specified/main.profdata \
@@ -35,7 +35,7 @@ REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}extra{{[/\\]}}dec.h{{.*}}
 REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}extra{{[/\\]}}inc.h{{.*}}
 REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}main.cc{{.*}}
 REPORT_IGNORE_DIR_WITH_SOURCES: {{.*}}abs.h{{.*}}
-REPORT_IGNORE_DIR_WITH_SOURCES: {{^}}TOTAL 4{{.*}}80.00%{{$}}
+REPORT_IGNORE_DIR_WITH_SOURCES: {{^}}TOTAL 4{{.*}}100.00%{{$}}
 
 ########################
 # Test "show" command.
diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp
index 43386d23883e..4854b7f1454c 100644
--- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp
@@ -675,7 +675,7 @@ TEST_P(CoverageMappingTest, test_line_coverage_iterator) {
   CoverageData Data = LoadedCoverage->getCoverageForFile("file1");
 
   unsigned Line = 0;
-  unsigned LineCounts[] = {20, 20, 20, 20, 10, 10, 10, 10, 10, 0, 0};
+  unsigned LineCounts[] = {20, 20, 20, 20, 30, 10, 10, 10, 10, 0, 0};
   for (const auto &LCS : getLineCoverageStats(Data)) {
     ASSERT_EQ(Line + 1, LCS.getLine());
     errs() << "Line: " << Line + 1 << ", count = " << LCS.getExecutionCount() << "\n";

From 99df95fd910becbcf89dd6f17f1e259353a72d27 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Wed, 3 Feb 2021 12:45:46 +0100
Subject: [PATCH 164/244] [clang][CodeComplete] Fix crash on ParenListExprs

Fixes https://github.com/clangd/clangd/issues/676.

Differential Revision: https://reviews.llvm.org/D95935
---
 clang/lib/Sema/SemaCodeComplete.cpp            | 18 ++++++++++++++++--
 .../test/CodeCompletion/function-overloads.cpp |  6 ++++++
 clang/test/CodeCompletion/member-access.c      |  7 +++++++
 clang/unittests/Sema/CodeCompleteTest.cpp      |  1 +
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index c2785fd60fc2..40ea0f5d24b3 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -5168,6 +5168,15 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
   if (!Base || !CodeCompleter)
     return;
 
+  // Peel off the ParenListExpr by chosing the last one, as they don't have a
+  // predefined type.
+  if (auto *PLE = llvm::dyn_cast<ParenListExpr>(Base))
+    Base = PLE->getExpr(PLE->getNumExprs() - 1);
+  if (OtherOpBase) {
+    if (auto *PLE = llvm::dyn_cast<ParenListExpr>(OtherOpBase))
+      OtherOpBase = PLE->getExpr(PLE->getNumExprs() - 1);
+  }
+
   ExprResult ConvertedBase = PerformMemberExprBaseConversion(Base, IsArrow);
   if (ConvertedBase.isInvalid())
     return;
@@ -5597,12 +5606,17 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S,
 QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
                                         ArrayRef<Expr *> Args,
                                         SourceLocation OpenParLoc) {
-  if (!CodeCompleter)
+  if (!CodeCompleter || !Fn)
     return QualType();
 
+  // If we have a ParenListExpr for LHS, peel it off by chosing the last expr.
+  // As ParenListExprs don't have a predefined type.
+  if (auto *PLE = llvm::dyn_cast<ParenListExpr>(Fn))
+    Fn = PLE->getExpr(PLE->getNumExprs() - 1);
+
   // FIXME: Provide support for variadic template functions.
   // Ignore type-dependent call expressions entirely.
-  if (!Fn || Fn->isTypeDependent() || anyNullArguments(Args))
+  if (Fn->isTypeDependent() || anyNullArguments(Args))
     return QualType();
   // In presence of dependent args we surface all possible signatures using the
   // non-dependent args in the prefix. Afterwards we do a post filtering to make
diff --git a/clang/test/CodeCompletion/function-overloads.cpp b/clang/test/CodeCompletion/function-overloads.cpp
index 11c864c28107..7b8ccef1d580 100644
--- a/clang/test/CodeCompletion/function-overloads.cpp
+++ b/clang/test/CodeCompletion/function-overloads.cpp
@@ -21,6 +21,8 @@ namespace NS {
 void test_adl() {
   NS::X x;
   g(x, x);
+  (void)(f)(1, 2, 3);
+  (void)(test, test, test, f)(1, 2, 3);
 }
 
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:10:9 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
@@ -31,6 +33,10 @@ void test_adl() {
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:10:21 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:23:7 %s -o - | \
 // RUN:    FileCheck -check-prefix=CHECK-CC5 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:24:13 %s -o - | \
+// RUN:    FileCheck -check-prefix=CHECK-CC1 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:25:31 %s -o - | \
+// RUN:    FileCheck -check-prefix=CHECK-CC1 %s
 // CHECK-CC1: OVERLOAD: [#int#]f(<#float x#>, float y)
 // CHECK-CC1: OVERLOAD: [#int#]f(<#int i#>)
 // CHECK-CC1-NOT, CHECK-CC2-NOT: OVERLOAD: A(
diff --git a/clang/test/CodeCompletion/member-access.c b/clang/test/CodeCompletion/member-access.c
index 72afbf2ff947..545349f71731 100644
--- a/clang/test/CodeCompletion/member-access.c
+++ b/clang/test/CodeCompletion/member-access.c
@@ -29,3 +29,10 @@ void test3(struct Point2 *p) {
 
 // RUN: %clang_cc1 -fsyntax-only -code-completion-with-fixits -code-completion-at=%s:24:5 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
 // CHECK-CC3: x (requires fix-it: {24:4-24:5} to "->")
+
+void test4(struct Point *p) {
+  (int)(p)->x;
+  (int)(0,1,2,3,4,p)->x;
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-with-fixits -code-completion-at=%s:34:13 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
+// RUN: %clang_cc1 -fsyntax-only -code-completion-with-fixits -code-completion-at=%s:35:23 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
diff --git a/clang/unittests/Sema/CodeCompleteTest.cpp b/clang/unittests/Sema/CodeCompleteTest.cpp
index d8b303d77bb9..dae0793658c5 100644
--- a/clang/unittests/Sema/CodeCompleteTest.cpp
+++ b/clang/unittests/Sema/CodeCompleteTest.cpp
@@ -488,6 +488,7 @@ TEST(PreferredTypeTest, NoCrashOnInvalidTypes) {
     auto y = new decltype(&1)(^);
     // GNU decimal type extension is not supported in clang.
     auto z = new _Decimal128(^);
+    void foo() { (void)(foo)(^); }
   )cpp";
   EXPECT_THAT(collectPreferredTypes(Code), Each("NULL TYPE"));
 }

From 7fc6c60608e416e7f8f5c194768c6dd511449c1b Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 18 Feb 2021 13:48:43 +0100
Subject: [PATCH 165/244] [clang][CodeComplete] Ensure there are no crashes
 when completing with ParenListExprs as LHS

Differential Revision: https://reviews.llvm.org/D96950
---
 clang/lib/Sema/SemaCodeComplete.cpp | 31 ++++++++++++++++-------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 40ea0f5d24b3..be04970979b3 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -5158,6 +5158,20 @@ class ConceptInfo {
 
   llvm::DenseMap<const IdentifierInfo *, Member> Results;
 };
+
+// If \p Base is ParenListExpr, assume a chain of comma operators and pick the
+// last expr. We expect other ParenListExprs to be resolved to e.g. constructor
+// calls before here. (So the ParenListExpr should be nonempty, but check just
+// in case)
+Expr *unwrapParenList(Expr *Base) {
+  if (auto *PLE = llvm::dyn_cast_or_null<ParenListExpr>(Base)) {
+    if (PLE->getNumExprs() == 0)
+      return nullptr;
+    Base = PLE->getExpr(PLE->getNumExprs() - 1);
+  }
+  return Base;
+}
+
 } // namespace
 
 void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
@@ -5165,18 +5179,11 @@ void Sema::CodeCompleteMemberReferenceExpr(Scope *S, Expr *Base,
                                            SourceLocation OpLoc, bool IsArrow,
                                            bool IsBaseExprStatement,
                                            QualType PreferredType) {
+  Base = unwrapParenList(Base);
+  OtherOpBase = unwrapParenList(OtherOpBase);
   if (!Base || !CodeCompleter)
     return;
 
-  // Peel off the ParenListExpr by chosing the last one, as they don't have a
-  // predefined type.
-  if (auto *PLE = llvm::dyn_cast<ParenListExpr>(Base))
-    Base = PLE->getExpr(PLE->getNumExprs() - 1);
-  if (OtherOpBase) {
-    if (auto *PLE = llvm::dyn_cast<ParenListExpr>(OtherOpBase))
-      OtherOpBase = PLE->getExpr(PLE->getNumExprs() - 1);
-  }
-
   ExprResult ConvertedBase = PerformMemberExprBaseConversion(Base, IsArrow);
   if (ConvertedBase.isInvalid())
     return;
@@ -5606,14 +5613,10 @@ ProduceSignatureHelp(Sema &SemaRef, Scope *S,
 QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
                                         ArrayRef<Expr *> Args,
                                         SourceLocation OpenParLoc) {
+  Fn = unwrapParenList(Fn);
   if (!CodeCompleter || !Fn)
     return QualType();
 
-  // If we have a ParenListExpr for LHS, peel it off by chosing the last expr.
-  // As ParenListExprs don't have a predefined type.
-  if (auto *PLE = llvm::dyn_cast<ParenListExpr>(Fn))
-    Fn = PLE->getExpr(PLE->getNumExprs() - 1);
-
   // FIXME: Provide support for variadic template functions.
   // Ignore type-dependent call expressions entirely.
   if (Fn->isTypeDependent() || anyNullArguments(Args))

From 1c0a0c727eaeee7d7283f9dabe861e69881764c4 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 23 Feb 2021 11:12:50 -0800
Subject: [PATCH 166/244] [12.0.0][llvm-symbolizer][test] Fix test broken after
 cherry-pick

See bug https://bugs.llvm.org/show_bug.cgi?id=49227. The cherry-pick 0d4f8a3f364f introduced a test failure, as the test included use of a feature that was only recently added to lit and isn't in the release branch. This patch fixes up the test to manage without this lit change.

Reviewed By: tstellar, MaskRay

Differential Revision: https://reviews.llvm.org/D97272
---
 llvm/test/tools/llvm-symbolizer/output-style-inlined.test | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test
index 1b8e3a2f22fb..103c2afc176e 100644
--- a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test
+++ b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test
@@ -33,17 +33,17 @@ GNU: inctwo
 ## is specified, but a file doesn't exist. Check we report an error.
 
 RUN: llvm-symbolizer --output-style=GNU --obj=%p/Inputs/not.exist 0x1 0x2 --no-inlines 2>&1 \
-RUN:   | FileCheck %s --check-prefix=NOT-EXIST-GNU -DMSG=%errc_ENOENT
+RUN:   | FileCheck %s --check-prefix=NOT-EXIST-GNU
 RUN: llvm-symbolizer --output-style=LLVM --obj=%p/Inputs/not.exist 0x1 0x2 --no-inlines 2>&1 \
-RUN:   | FileCheck %s --check-prefix=NOT-EXIST-LLVM -DMSG=%errc_ENOENT
+RUN:   | FileCheck %s --check-prefix=NOT-EXIST-LLVM
 
-# NOT-EXIST-GNU:      LLVMSymbolizer: error reading file: [[MSG]]
+# NOT-EXIST-GNU:      LLVMSymbolizer: error reading file: {{[Nn]}}o such file or directory
 # NOT-EXIST-GNU-NEXT: ??
 # NOT-EXIST-GNU-NEXT: ??:0
 # NOT-EXIST-GNU-NEXT: ??
 # NOT-EXIST-GNU-NEXT: ??:0
 
-# NOT-EXIST-LLVM:       LLVMSymbolizer: error reading file: [[MSG]]
+# NOT-EXIST-LLVM:       LLVMSymbolizer: error reading file: {{[Nn]}}o such file or directory
 # NOT-EXIST-LLVM-NEXT:  ??
 # NOT-EXIST-LLVM-NEXT:  ??:0:0
 # NOT-EXIST-LLVM-EMPTY:

From eccac5a8aec92c995f0f8ef090ba4142e0334b46 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <andrew.kaylor@intel.com>
Date: Wed, 3 Feb 2021 18:16:04 -0800
Subject: [PATCH 167/244] Add auto-upgrade support for annotation intrinsics

The llvm.ptr.annotation and llvm.var.annotation intrinsics were changed
since the 11.0 release to add an additional parameter. This patch
auto-upgrades IR containing the four-parameter versions of these
intrinsics, adding a null pointer as the fifth argument.

Differential Revision: https://reviews.llvm.org/D95993

(cherry picked from commit 9a827906cb95e7c3ae94627558da67b47ffde249)
---
 llvm/lib/IR/AutoUpgrade.cpp                   |  42 ++++++++++++++++
 llvm/test/Bitcode/upgrade-ptr-annotation.ll   |  45 ++++++++++++++++++
 .../test/Bitcode/upgrade-ptr-annotation.ll.bc | Bin 0 -> 1524 bytes
 llvm/test/Bitcode/upgrade-var-annotation.ll   |  15 ++++++
 .../test/Bitcode/upgrade-var-annotation.ll.bc | Bin 0 -> 1232 bytes
 5 files changed, 102 insertions(+)
 create mode 100644 llvm/test/Bitcode/upgrade-ptr-annotation.ll
 create mode 100644 llvm/test/Bitcode/upgrade-ptr-annotation.ll.bc
 create mode 100644 llvm/test/Bitcode/upgrade-var-annotation.ll
 create mode 100644 llvm/test/Bitcode/upgrade-var-annotation.ll.bc

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 23e7af6287b6..7d83cf5dcf1d 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -937,6 +937,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
         return true;
       }
+    } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
+      rename(F);
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::ptr_annotation,
+                                        F->arg_begin()->getType());
+      return true;
     }
     break;
 
@@ -947,6 +953,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
 
+  case 'v': {
+    if (Name == "var.annotation" && F->arg_size() == 4) {
+      rename(F);
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::var_annotation);
+      return true;
+    }
+    break;
+  }
+
   case 'x':
     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
       return true;
@@ -3730,6 +3746,32 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     CI->eraseFromParent();
     return;
 
+  case Intrinsic::ptr_annotation:
+    // Upgrade from versions that lacked the annotation attribute argument.
+    assert(CI->getNumArgOperands() == 4 &&
+           "Before LLVM 12.0 this intrinsic took four arguments");
+    // Create a new call with an added null annotation attribute argument.
+    NewCall = Builder.CreateCall(
+        NewFn,
+        {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+         CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+    NewCall->takeName(CI);
+    CI->replaceAllUsesWith(NewCall);
+    CI->eraseFromParent();
+    return;
+
+  case Intrinsic::var_annotation:
+    // Upgrade from versions that lacked the annotation attribute argument.
+    assert(CI->getNumArgOperands() == 4 &&
+           "Before LLVM 12.0 this intrinsic took four arguments");
+    // Create a new call with an added null annotation attribute argument.
+    NewCall = Builder.CreateCall(
+        NewFn,
+        {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
+         CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
+    CI->eraseFromParent();
+    return;
+
   case Intrinsic::x86_xop_vfrcz_ss:
   case Intrinsic::x86_xop_vfrcz_sd:
     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
diff --git a/llvm/test/Bitcode/upgrade-ptr-annotation.ll b/llvm/test/Bitcode/upgrade-ptr-annotation.ll
new file mode 100644
index 000000000000..aeacc6f1a6ce
--- /dev/null
+++ b/llvm/test/Bitcode/upgrade-ptr-annotation.ll
@@ -0,0 +1,45 @@
+; Test upgrade of ptr.annotation intrinsics.
+;
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; Unused return values
+; The arguments passed to the intrinisic wouldn't normally be arguments to
+; the function, but that makes it easier to test that they are handled
+; correctly.
+define void @f1(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) {
+;CHECK: @f1(i8* [[ARG0:%.*]], i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i32 [[ARG3:%.*]])
+  %t0 = call i8* @llvm.ptr.annotation.p0i8(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3)
+;CHECK:  call i8* @llvm.ptr.annotation.p0i8(i8* [[ARG0]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null)
+
+  %arg0_p16 = bitcast i8* %arg0 to i16*
+  %t1 = call i16* @llvm.ptr.annotation.p0i16(i16* %arg0_p16, i8* %arg1, i8* %arg2, i32 %arg3)
+;CHECK:  [[ARG0_P16:%.*]] = bitcast
+;CHECK:  call i16* @llvm.ptr.annotation.p0i16(i16* [[ARG0_P16]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null)
+
+  %arg0_p256 = bitcast i8* %arg0 to i256*
+  %t2 = call i256* @llvm.ptr.annotation.p0i256(i256* %arg0_p256, i8* %arg1, i8* %arg2, i32 %arg3)
+;CHECK:  [[ARG0_P256:%.*]] = bitcast
+;CHECK:  call i256* @llvm.ptr.annotation.p0i256(i256* [[ARG0_P256]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null)
+  ret void
+}
+
+; Used return values
+define i16* @f2(i16* %x, i16* %y) {
+  %t0 = call i16* @llvm.ptr.annotation.p0i16(i16* %x, i8* undef, i8* undef, i32 undef)
+  %t1 = call i16* @llvm.ptr.annotation.p0i16(i16* %y, i8* undef, i8* undef, i32 undef)
+  %cmp = icmp ugt i16* %t0, %t1
+  %sel = select i1 %cmp, i16* %t0, i16* %t1
+  ret i16* %sel
+; CHECK:  [[T0:%.*]] = call i16* @llvm.ptr.annotation.p0i16(i16* %x, i8* undef, i8* undef, i32 undef, i8* null)
+; CHECK:  [[T1:%.*]] = call i16* @llvm.ptr.annotation.p0i16(i16* %y, i8* undef, i8* undef, i32 undef, i8* null)
+; CHECK:  %cmp = icmp ugt i16* [[T0]], [[T1]]
+; CHECK:  %sel = select i1 %cmp, i16* [[T0]], i16* [[T1]]
+; CHECK:  ret i16* %sel
+}
+
+declare i8*   @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32)
+; CHECK: declare i8*   @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
+declare i16*  @llvm.ptr.annotation.p0i16(i16*, i8*, i8*, i32)
+; CHECK: declare i16*   @llvm.ptr.annotation.p0i16(i16*, i8*, i8*, i32, i8*)
+declare i256* @llvm.ptr.annotation.p0i256(i256*, i8*, i8*, i32)
+; CHECK: declare i256*   @llvm.ptr.annotation.p0i256(i256*, i8*, i8*, i32, i8*)
diff --git a/llvm/test/Bitcode/upgrade-ptr-annotation.ll.bc b/llvm/test/Bitcode/upgrade-ptr-annotation.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..5db0810ff88515d1fc40abe7fc20275d8cfb83aa
GIT binary patch
literal 1524
zcmZ`(Z%h+s7=QcsPAJzK64#Dva|M=kPVri#UTM3uJ*U-?Sa2U$CRwl4iVNcLXIOAC
zN3Z5^>xUwkbQv)=AtruU(q(AOGz;aBG1nM$ZgCqUG*om34O3^#X5!vf_wR#Ga_>Fw
zd(S=Z@ArG&=k+p^3-uKMr~m*#4OO%Aov*^Jf1ce{+Qc`?;F?JTfDbyQaubll`8xO@
zgVn>HS>+BjZnYnm#Wb&~P`y$4ageWgQ|>)p>K(_e6*Q`QtHjzo<m0AI)mzKOIPN(v
z56z;VSCGuMA@UoUwK=9+Z(g6Ue3;%dOkSFWd5@6*Kq$ak<y%9Bjw(m0#$#|;ilq`2
z<&7W3OJn1WxW5|oDAA*KIb~MiUQJbx692?H)`On#q9*m3=CSH&(zYE`<#+ye5*HTT
zlY|E}=>Sl`yp{3Ar_d0${131Lt`+#LjRNqBsS*34;mHQ=$hRBm)`gM_zZ@J$o;-4)
zwc}Bu1V>@iKPJ;PuaOsY_}fUpte5ZR!M{<;fbY^?4PMM%jN*aj%pJGzzZiE=a5tR>
zfNz2Nj=-=wlqaThM7Toc8e-^}6!-E-e+21Cc4?M@_AZV+<gq%9*Ep3iTuBjON>vOK
zDzggLEaQqRT-ivxmqQMtNWV|yIAnlR^hHFD6!qDcu$2UsqqLorc4-w`^<#DTv|+(S
z+%OrYSYwzv=C~+0<{0~g!g*0}#uSe1YDHfFIoytHX&1ZU$ctP;<N`>a8R>hhf@u%&
z;(Msrr4zf8Vt;aQ!@X7P-Xiw<B(_2+g0m?ipCYowah@eEn~3Q_!(~?eLxQo-GWKL~
zJTK9s3_YZ9j7po}rca`d>k3C&(tN#N<XXgTPUvk%A_1{iC-$Ba^;dRl?*Z+7KbB8m
zOKxqB#8w~+(rlQ|7^ghOYbIiT&^W(H<a35QGGkcRM5mdCF~O1b*E}Z@5D!I=NV2K%
zVM1F_!1%uLGE3Yx85S~zBQj$Fu*tB>s^2~;&{@fmW?WE^CsWQm;~0WdDSoL0G8GR&
zJSZ|?M!J)OdzNr)WfHsT#~x7CnQI<nKKJsCoM8b1@aY|C!8IXN#w0q<*t3%Je8xU2
zL5|L>Z=wrDAgV4XV2h~#c?MevXzwnnS4r(nN_&&kKJsIoIJQJ#aQa|T{p%u@_hXMK
zCij!cu$&@RWkuxdw(CDiG%QwH+H`|y$TAH>Oao*+$Gic~z_YD~XR5TM+YD=k476}b
z#swu$Gxj+tx=hTZ%5HlKM`_t)c&5R+%}W97mLE#1%@+jL-tuD))BGUPZ!T6oB0|&!
z-YwjiL{G>Z<AQ6n&{J1&0Mzh}WS!%JV+<Bl;W{NaVQ~{up}$xX_7{AwRc3}{a-P(g
z5ew>Bw{JV_5M_q<M5DU<5l4^*B&zn86WdZbHz<Sgr+F~!dAjSfccawRb7bRLON~1;
z_q6TSm-Xn#0hJQhPv~)X=z5<U46~JT+18oDPtaKM*&Vf3>Dd6G74NK?Flf(0zgmrg
zXkbzoBbCoxVwyswGB2tAu1vr?a7<a`a*jf?ZEL2sej25|pP_uyK~-k2T2^2QKo|M!
zF0o>GRSYEMI``|v^;X!&uR*U|F}kI|J8TJCJ39If)N;MuwQOhS!Ctm^|G~~$j@nPZ
zVBKPS!3S%-&0?vw)KX*}OIvAc-4<KW)@BU`ZS`zjFj!AhZ5ArDImBA5c3X%Jg)JX&
U`?}fC-i>hcjsMxaqoauRFGh#!<^TWy

literal 0
HcmV?d00001

diff --git a/llvm/test/Bitcode/upgrade-var-annotation.ll b/llvm/test/Bitcode/upgrade-var-annotation.ll
new file mode 100644
index 000000000000..30f692cd8db8
--- /dev/null
+++ b/llvm/test/Bitcode/upgrade-var-annotation.ll
@@ -0,0 +1,15 @@
+; Test upgrade of var.annotation intrinsics.
+;
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+
+define void @f(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3) {
+;CHECK: @f(i8* [[ARG0:%.*]], i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i32 [[ARG3:%.*]])
+  call void @llvm.var.annotation(i8* %arg0, i8* %arg1, i8* %arg2, i32 %arg3)
+;CHECK:  call void @llvm.var.annotation(i8* [[ARG0]], i8* [[ARG1]], i8* [[ARG2]], i32 [[ARG3]], i8* null)
+  ret void
+}
+
+; Function Attrs: nofree nosync nounwind willreturn
+declare void @llvm.var.annotation(i8*, i8*, i8*, i32)
+; CHECK: declare void @llvm.var.annotation(i8*, i8*, i8*, i32, i8*)
diff --git a/llvm/test/Bitcode/upgrade-var-annotation.ll.bc b/llvm/test/Bitcode/upgrade-var-annotation.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..c5f88855fb171b5a787b5637ee62834e30edeca4
GIT binary patch
literal 1232
zcmYLJaZDRk7=NXOyFmBu1f1=-Hg{thvJ7@DgI#HBD7O(n5{vo=CL?lPSD9gObhQ*P
zOG~?5r;RCtKl%qTBjF$as}aqT5kong6`KW@#h^pdhQg*{q;a?`N}}&N7r*3v@4fH6
z``+*SecyL&{L*GqH2|dm0GF0M*7fc;p1!}odb6S@*sVe~MhgH$i;5Bkl%V}#G_X+Z
zlq0R_EF;a98>)!*)lyt<)O_L!YR{Im-Kc1Dljdp;*S%40Hb*<e6{hw~r4%C_H%i=T
z{EKRuZ;#U7tIXzz?s(ntxap(hxhYysBfr;a0MK*5d^C;I?yH(<W<G+{R%9+-UD<st
zRuOS`lT<CZtidmQ3bb9S&RVEJ_j_~zsL@;#%l?A|Yc2T;bRwIAziJYIBTP5^vgP0e
zG5tgRMDJ$#?O%t-6Q5nV-8c9&UXJ7F)(Z&&-CLzqDoyJP10X?VjppMn^vsG&F;1h=
zhzF^a9ObE2*`quckz;K^Y%G9XPK;=G0P&cF`$5>KZ8I*X40q=!4_jKi7Ou&t?P=Z~
zQ`<9vm`}teaBQqY5=CrWRQm&xNK5+6OK>+1b1X5$5?cjWaKc7%#jwdx>x^MpFnZYQ
z*2S=OjkheQZHr-BL~YF!s{Q@g!~k}BK=L9Tl*G6s_GA7!%>S$uxg8Bk@8QyjPVy$C
zu|(*^UIFf9;h#%zmsPvcb5wqg$`tj6K&>*=O31J(l>HRvEot78DC%1>H^XyLwRJ{5
zi5@+ewBA=+ld|@^i;~zYdBw1A01Na>KAq&dDe3RNP3!^U4=2pW;Z`e=qv0-!g03@c
zq>RfB<2{Dj2pKoBR6b|;U1jv>dblLtG8?vLoX7qPq+c2hV1YzW_kNsE7%+ZdTotHC
zjA1inxS}#DfEhzUD0}B>n9Io4ByUHA94y=Nyfuopa_rl36sa_d@Zi{Z9p+7h&TWx!
zcL{Dg;S;ttb<bhU=U!RQ88#7s4!t!Qwl9QhA~F}_Eg9K%D`iQ`C`MbRV_^ix5ULSG
zV6UWqa0l-86OXfH1)A7qiEWyA>V!ii++rcxKFOB-mW6pIe8%#*Ul_yA992*ikqhnj
zf0j9vtfYK$oo~tTEm6J&MZLzq4n9Y9sz)_dQSPlnxx&VK#RP9hlqY%1nmoBf-I+V|
z$f1<d4r6%E!QoR|{qUg^QA^|%f{BMtxStG$u(7&g<^vK!t;F3c&dS_^$~qsm&nW%z
zBMCqmnz4*+K5U&u302!~hHWUhaarjVfd=3ZnyWOasH!AL>&Q|Q9=fJh&Xn(7YXwt|
z2S9bE{gOsQ>c4)OY;|uSyCNg>^By!q@@NEvm0v|s`cOk3L90sj%%WoO3=aA)HTVT@
zgD^BS>=S$!hlfn222%q|Hwv7YGdG@Yb~T?jyIjppLZi#o#Ifg1toxK(FqtjQZqDs7
XeJH-~72Fr<k(2uWoiq#%D*pck%7K{T

literal 0
HcmV?d00001


From 06e5dec59e0b53c5a99fc8d2ff30960b24c4f1b6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 23 Feb 2021 15:57:13 -0800
Subject: [PATCH 168/244] Fix test failures after
 a92ceea91116e7b95d23eff634507fa2cff86ef2

---
 llvm/test/tools/llvm-cov/branch-c-general.test    | 12 ++++++------
 llvm/test/tools/llvm-cov/branch-logical-mixed.cpp |  4 ++--
 llvm/test/tools/llvm-cov/branch-noShowBranch.test |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/test/tools/llvm-cov/branch-c-general.test b/llvm/test/tools/llvm-cov/branch-c-general.test
index bbebdd19fbae..33c12d611992 100644
--- a/llvm/test/tools/llvm-cov/branch-c-general.test
+++ b/llvm/test/tools/llvm-cov/branch-c-general.test
@@ -118,18 +118,18 @@
 // REPORT-NEXT: ---
 // REPORT-NEXT: simple_loops                      8       0 100.00%         9       0 100.00%         6       0 100.00%
 // REPORT-NEXT: conditionals                     24       0 100.00%        15       0 100.00%        16       2  87.50%
-// REPORT-NEXT: early_exits                      20       4  80.00%        25       3  88.00%        16       6  62.50%
-// REPORT-NEXT: jumps                            39      12  69.23%        48       4  91.67%        26       9  65.38%
-// REPORT-NEXT: switches                         28       5  82.14%        38       5  86.84%        30       9  70.00%
+// REPORT-NEXT: early_exits                      20       4  80.00%        25       2  92.00%        16       6  62.50%
+// REPORT-NEXT: jumps                            39      12  69.23%        48       2  95.83%        26       9  65.38%
+// REPORT-NEXT: switches                         28       5  82.14%        38       4  89.47%        30       9  70.00%
 // REPORT-NEXT: big_switch                       25       1  96.00%        32       0 100.00%        30       6  80.00%
 // REPORT-NEXT: boolean_operators                16       0 100.00%        13       0 100.00%        22       2  90.91%
 // REPORT-NEXT: boolop_loops                     19       0 100.00%        14       0 100.00%        16       2  87.50%
-// REPORT-NEXT: conditional_operator              4       2  50.00%         8       1  87.50%         4       2  50.00%
+// REPORT-NEXT: conditional_operator              4       2  50.00%         8       0 100.00%         4       2  50.00%
 // REPORT-NEXT: do_fallthrough                    9       0 100.00%        12       0 100.00%         6       0 100.00%
 // REPORT-NEXT: main                              1       0 100.00%        16       0 100.00%         0       0   0.00%
 // REPORT-NEXT: c-general.c:static_func           4       0 100.00%         4       0 100.00%         2       0 100.00%
 // REPORT-NEXT: ---
-// REPORT-NEXT: TOTAL                           197      24  87.82%       234      13 94.44%       174      38  78.16%
+// REPORT-NEXT: TOTAL                           197      24  87.82%       234       8 96.58%       174      38  78.16%
 
 // Test file-level report.
 // RUN: llvm-profdata merge %S/Inputs/branch-c-general.proftext -o %t.profdata
@@ -157,7 +157,7 @@
 // HTML-INDEX: <td class='column-entry-green'>
 // HTML-INDEX: 100.00% (12/12)
 // HTML-INDEX: <td class='column-entry-yellow'>
-// HTML-INDEX: 94.44% (221/234)
+// HTML-INDEX: 96.58% (226/234)
 // HTML-INDEX: <td class='column-entry-yellow'>
 // HTML-INDEX: 87.82% (173/197)
 // HTML-INDEX: <td class='column-entry-red'>
diff --git a/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp b/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp
index 107ed7778015..f5f787112446 100644
--- a/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp
+++ b/llvm/test/tools/llvm-cov/branch-logical-mixed.cpp
@@ -84,7 +84,7 @@ int main(int argc, char *argv[])
 
 // REPORT: Name                        Regions    Miss   Cover     Lines    Miss   Cover  Branches    Miss   Cover
 // REPORT-NEXT: ---
-// REPORT-NEXT: _Z4funcii                        77       9  88.31%        68       10  85.29%        80      32  60.00%
+// REPORT-NEXT: _Z4funcii                        77       9  88.31%        68       3  95.59%        80      32  60.00%
 // REPORT-NEXT: main                              1       0 100.00%         5       0 100.00%         0       0   0.00%
 // REPORT-NEXT: ---
-// REPORT-NEXT: TOTAL                            78       9  88.46%        73       10  86.30%        80      32  60.00%
+// REPORT-NEXT: TOTAL                            78       9  88.46%        73       3  95.89%        80      32  60.00%
diff --git a/llvm/test/tools/llvm-cov/branch-noShowBranch.test b/llvm/test/tools/llvm-cov/branch-noShowBranch.test
index a8f12d698933..79069b2f07bf 100644
--- a/llvm/test/tools/llvm-cov/branch-noShowBranch.test
+++ b/llvm/test/tools/llvm-cov/branch-noShowBranch.test
@@ -20,6 +20,6 @@
 // REPORT-NOT: do_fallthrough                    9       0 100.00%        12       0 100.00%         6       0 100.00%
 // REPORT-NOT: main                              1       0 100.00%        16       0 100.00%         0       0   0.00%
 // REPORT-NOT: c-general.c:static_func           4       0 100.00%         4       0 100.00%         2       0 100.00%
-// REPORT: TOTAL                           197      24  87.82%       234       13  94.44%
-// REPORT-NOT: TOTAL                           197      24  87.82%       234       13  94.44%       174      38  78.16%
+// REPORT: TOTAL                           197      24  87.82%       234       8  96.58%
+// REPORT-NOT: TOTAL                           197      24  87.82%       234       8  96.58%       174      38  78.16%
 

From d56d2c8863b6ae3637b6261c32ea9479d8e1e2d6 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 27 Jan 2021 13:08:24 -0500
Subject: [PATCH 169/244] [libc++] Fix extern template test failing on Windows

See https://reviews.llvm.org/D94718#2521489 for details.

(cherry picked from commit 90407b16b1d3e38f1360b6a24ceab801ab9cefc1)
---
 libcxx/test/libcxx/debug/extern-templates.sh.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/libcxx/debug/extern-templates.sh.cpp b/libcxx/test/libcxx/debug/extern-templates.sh.cpp
index d5039d4f3029..b2ed6a63d630 100644
--- a/libcxx/test/libcxx/debug/extern-templates.sh.cpp
+++ b/libcxx/test/libcxx/debug/extern-templates.sh.cpp
@@ -15,7 +15,7 @@
 // UNSUPPORTED: libcpp-has-no-localization
 
 // RUN: %{cxx} %{flags} %{compile_flags} %s %{link_flags} -fPIC -DTU1 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -shared -o %t.lib
-// RUN: %{cxx} %{flags} %{compile_flags} %s %t.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe
+// RUN: cd %T && %{cxx} %{flags} %{compile_flags} %s %basename_t.tmp.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe
 // RUN: %{exec} %t.exe
 
 #include <cassert>

From 4918a3d138b907a571f496661b5367e090e1e8bb Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 28 Jan 2021 10:46:22 -0500
Subject: [PATCH 170/244] [libc++] Fix extern-templates.sh.cpp test on Linux

(cherry picked from commit bf5941afcda3ac6570ba25165758869287491e0d)
---
 libcxx/test/libcxx/debug/extern-templates.sh.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/libcxx/debug/extern-templates.sh.cpp b/libcxx/test/libcxx/debug/extern-templates.sh.cpp
index b2ed6a63d630..0e19895ba8f0 100644
--- a/libcxx/test/libcxx/debug/extern-templates.sh.cpp
+++ b/libcxx/test/libcxx/debug/extern-templates.sh.cpp
@@ -15,7 +15,7 @@
 // UNSUPPORTED: libcpp-has-no-localization
 
 // RUN: %{cxx} %{flags} %{compile_flags} %s %{link_flags} -fPIC -DTU1 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -shared -o %t.lib
-// RUN: cd %T && %{cxx} %{flags} %{compile_flags} %s %basename_t.tmp.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe
+// RUN: cd %T && %{cxx} %{flags} %{compile_flags} %s ./%basename_t.tmp.lib %{link_flags} -fPIC -DTU2 -D_LIBCPP_DEBUG=1 -fvisibility=hidden -o %t.exe
 // RUN: %{exec} %t.exe
 
 #include <cassert>

From e0e6b1e39e7e402cd74a8bf98a2728efbe38310e Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 24 Feb 2021 09:19:23 -0800
Subject: [PATCH 171/244] ReleaseNotes: add lld/ELF notes

Differential Revision: https://reviews.llvm.org/D97113
---
 lld/docs/ReleaseNotes.rst | 68 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 7c1cbc4a4c4b..24ed23bb2b7d 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -24,13 +24,77 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
-* ``--error-handling-script`` is added to allow for user-defined handlers upon
+* ``--dependency-file`` has been added. (Similar to ``cc -M -MF``.)
+  (`D82437 <https://reviews.llvm.org/D82437>`_)
+* ``--error-handling-script`` has been added to allow for user-defined handlers upon
   missing libraries. (`D87758 <https://reviews.llvm.org/D87758>`_)
+* ``--exclude-libs`` can now localize defined version symbols and bitcode referenced libcall symbols.
+  (`D94280 <https://reviews.llvm.org/D94280>`_)
+* ``--gdb-index`` now works with DWARF v5 and ``--icf={safe,all}``.
+  (`D85579 <https://reviews.llvm.org/D85579>`_)
+  (`D89751 <https://reviews.llvm.org/D89751>`_)
+* ``--gdb-index --emit-relocs`` can now be used together.
+  (`D94354 <https://reviews.llvm.org/D94354>`_)
+* ``--icf={safe,all}`` conservatively no longer fold text sections with LSDA.
+  Previously ICF on ``-fexceptions`` code could be unsafe.
+  (`D84610 <https://reviews.llvm.org/D84610>`_)
+* ``--icf={safe,all}`` can now fold two sections with relocations referencing aliased symbols.
+  (`D88830 <https://reviews.llvm.org/D88830>`_)
+* ``--lto-pseudo-probe-for-profiling`` has been added.
+  (`D95056 <https://reviews.llvm.org/D95056>`_)
+* ``--no-lto-whole-program-visibility`` has been added.
+  (`D92060 <https://reviews.llvm.org/D92060>`_)
+* ``--oformat-binary`` has been fixed to respect LMA.
+  (`D85086 <https://reviews.llvm.org/D85086>`_)
+* ``--reproduce`` includes ``--lto-sample-profile``, ``--just-symbols``, ``--call-graph-ordering-file``, ``--retain-symbols-file`` files.
+* ``-r --gc-sections`` is now supported.
+  (`D84131 <https://reviews.llvm.org/D84131>`_)
+* A ``-u`` specified symbol will no longer change the binding to ``STB_WEAK``.
+  (`D88945 <https://reviews.llvm.org/D88945>`_)
+* ``--wrap`` support has been improved.
+  + If ``foo`` is not referenced, there is no longer an undefined symbol ``__wrap_foo``.
+  + If ``__real_foo`` is not referenced, there is no longer an undefined symbol ``foo``.
+* ``SHF_LINK_ORDER`` sections can now have zero ``sh_link`` values.
+* ``SHF_LINK_ORDER`` and non-``SHF_LINK_ORDER`` sections can now be mixed within an input section description.
+  (`D84001 <https://reviews.llvm.org/D84001>`_)
+* ``LOG2CEIL`` is now supported in linker scripts.
+  (`D84054 <https://reviews.llvm.org/D84054>`_)
+* ``DEFINED`` has been fixed to check whether the symbol is defined.
+  (`D83758 <https://reviews.llvm.org/D83758>`_)
+* An input section description may now have multiple ``SORT_*``.
+  The matched sections are ordered by radix sort with the keys being ``(SORT*, --sort-section, input order)``.
+  (`D91127 <https://reviews.llvm.org/D91127>`_)
+* Users can now provide a GNU style linker script to convert ``.ctors`` into ``.init_array``.
+  (`D91187 <https://reviews.llvm.org/D91187>`_)
+* An empty output section can now be discarded even if it is assigned to a program header.
+  (`D92301 <https://reviews.llvm.org/D92301>`_)
+* Non-``SHF_ALLOC`` sections now have larger file offsets than ``SHF_ALLOC`` sections.
+  (`D85867 <https://reviews.llvm.org/D85867>`_)
+* Some symbol versioning improvements.
+  + Defined ``foo@@v1`` now resolve undefined ``foo@v1`` (`D92259 <https://reviews.llvm.org/D92259>`_)
+  + Undefined ``foo@v1`` now gets an error (`D92260 <https://reviews.llvm.org/D92260>`_)
+* The AArch64 port now has support for ``STO_AARCH64_VARIANT_PCS`` and ``DT_AARCH64_VARIANT_PCS``.
+  (`D93045 <https://reviews.llvm.org/D93045>`_)
+* The AArch64 port now has support for ``R_AARCH64_LD64_GOTPAGE_LO15``.
+* The PowerPC64 port now detects missing R_PPC64_TLSGD/R_PPC64_TLSLD and disables TLS relaxation.
+  This allows linking with object files produced by very old IBM XL compilers.
+  (`D92959 <https://reviews.llvm.org/D92959>`_)
+* Many PowerPC PC-relative relocations are now supported.
+* ``R_PPC_ADDR24`` and ``R_PPC64_ADDR16_HIGH`` are now supported.
+* powerpcle is now supported. Tested with FreeBSD loader and freestanding.
+  (`D93917 <https://reviews.llvm.org/D93917>`_)
+* RISC-V: the first ``SHT_RISCV_ATTRIBUTES`` section is now retained.
+  (`D86309 <https://reviews.llvm.org/D86309>`_)
+* LTO pipeline now defaults to the new PM if the CMake variable ``ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER`` is on.
+  (`D92885 <https://reviews.llvm.org/D92885>`_)
 
 Breaking changes
 ----------------
 
-* ...
+* A COMMON symbol can now cause the fetch of an archive providing a ``STB_GLOBAL`` definition.
+  This behavior follows GNU ld newer than December 1999.
+  If you see ``duplicate symbol`` errors with the new behavior, check out `PR49226 <https://bugs.llvm.org//show_bug.cgi?id=49226>`_.
+  (`D86142 <https://reviews.llvm.org/D86142>`_)
 
 COFF Improvements
 -----------------

From 98f06b16a313ece593f5711778d7da9037f3a2ef Mon Sep 17 00:00:00 2001
From: Pavel Iliin <Pavel.Iliin@arm.com>
Date: Thu, 25 Feb 2021 22:51:09 +0000
Subject: [PATCH 172/244] [AArch64][Docs] Release notes 12.x on outline atomics

Description for AArch64 -moutline-atomics, -mno-outline-atomics
options added to release notes.

Differential Revision: https://reviews.llvm.org/D97510
---
 clang/docs/ReleaseNotes.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a43cc33988ab..64f737ff488f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -73,6 +73,15 @@ New Compiler Flags
 
 - ...
 
+- AArch64 options ``-moutline-atomics``, ``-mno-outline-atomics`` to enable
+  and disable calls to helper functions implementing atomic operations. These
+  out-of-line helpers like '__aarch64_cas8_relax' will detect at runtime
+  AArch64 Large System Extensions (LSE) availability and either use their
+  atomic instructions, or falls back to LL/SC loop. These options do not apply
+  if the compilation target supports LSE. Atomic instructions are used directly
+  in that case. The option's behaviour mirrors GCC, the helpers are implemented
+  both in compiler-rt and libgcc.
+
 - -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
   for contents of a precompiled header in a separate object file. This object
   file needs to be linked in, but its contents do not need to be generated

From c637d4d136fd476d4a7418f5ecb76b80bcb6f8fc Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Tue, 23 Feb 2021 13:20:13 -0500
Subject: [PATCH 173/244] [OpenMP][NVPTX] Fixed a compilation error in
 deviceRTLs caused by unsupported feature in release verion of LLVM

`ptx71` is not supported in release version of LLVM yet. As a result,
the support of CUDA 11.2 and CUDA 11.1 caused a compilation error as mentioned
in D97004. Since the support in D97004 is just a WA for releease, and we'll not
use it in the near future, using `ptx70` for CUDA 11 is feasible.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D97195

(cherry picked from commit f6c2984a090e78947f75e096d43b476bf2ae73eb)
---
 openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 5478cd3f6aea..806a887cc2d8 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -153,7 +153,7 @@ add_custom_target(omptarget-nvptx-bc)
 # This map is from clang/lib/Driver/ToolChains/Cuda.cpp.
 # The last element is the default case.
 set(cuda_version_list 112 111 110 102 101 100 92 91 90 80)
-set(ptx_feature_list 71 71 70 65 64 63 61 61 60 42)
+set(ptx_feature_list 70 70 70 65 64 63 61 61 60 42)
 # The following two lines of ugly code is not needed when the minimal CMake
 # version requirement is 3.17+.
 list(LENGTH cuda_version_list num_version_supported)

From 692808e5af8338f5a109a64b5b9d75d05ec6f590 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 28 Feb 2021 10:17:10 -0500
Subject: [PATCH 174/244] [InstCombine] avoid infinite loop in demanded bits
 for select

https://llvm.org/PR49205
(cherry picked from commit 9502061bcc86982641772f45b7e7a0eb7437f054)
---
 .../InstCombineSimplifyDemanded.cpp           |  8 +++-
 .../InstCombine/select-imm-canon.ll           | 38 +++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index c265516213aa..16efe863779a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -345,10 +345,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         return false;
 
       // Get the constant out of the ICmp, if there is one.
+      // Only try this when exactly 1 operand is a constant (if both operands
+      // are constant, the icmp should eventually simplify). Otherwise, we may
+      // invert the transform that reduces set bits and infinite-loop.
+      Value *X;
       const APInt *CmpC;
       ICmpInst::Predicate Pred;
-      if (!match(I->getOperand(0), m_c_ICmp(Pred, m_APInt(CmpC), m_Value())) ||
-          CmpC->getBitWidth() != SelC->getBitWidth())
+      if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||
+          isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())
         return ShrinkDemandedConstant(I, OpNo, DemandedMask);
 
       // If the constant is already the same as the ICmp, leave it as-is.
diff --git a/llvm/test/Transforms/InstCombine/select-imm-canon.ll b/llvm/test/Transforms/InstCombine/select-imm-canon.ll
index e230b3b92777..fec6d693954a 100644
--- a/llvm/test/Transforms/InstCombine/select-imm-canon.ll
+++ b/llvm/test/Transforms/InstCombine/select-imm-canon.ll
@@ -87,3 +87,41 @@ define i8 @original_logical(i32 %A, i32 %B) {
   %conv7 = trunc i32 %spec.select.i to i8
   ret i8 %conv7
 }
+
+; This would infinite loop because we have potentially opposing
+; constant transforms on degenerate (unsimplified) cmps.
+
+define i32 @PR49205(i32 %t0, i1 %b) {
+; CHECK-LABEL: @PR49205(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 1
+;
+entry:
+  br label %for.cond
+
+for.cond:
+  %s = phi i32 [ 7, %entry ], [ %add, %for.body ]
+  br i1 %b, label %for.body, label %for.end
+
+for.body:
+  %div = add i32 %t0, undef
+  %add = add nsw i32 %div, 1
+  br label %for.cond
+
+for.end:
+  %cmp6 = icmp ne i32 %s, 4
+  %conv = zext i1 %cmp6 to i32
+  %and7 = and i32 %s, %conv
+  %sub = sub i32 %s, %and7
+  %cmp9 = icmp ne i32 %sub, 4
+  %conv10 = zext i1 %cmp9 to i32
+  %sub11 = sub i32 %conv10, %sub
+  %and = and i32 %sub11, 1
+  ret i32 %and
+}

From f73ba0f3582ba33984ad996c124d106a9737cd90 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 27 Feb 2021 09:09:03 -0500
Subject: [PATCH 175/244] [SimplifyCFG] avoid illegal phi with both poison and
 undef

In the example based on:
https://llvm.org/PR49218
...we are crashing because poison is a subclass of undef, so we merge blocks and create:

PHI node has multiple entries for the same basic block with different incoming values!
  %k3 = phi i64 [ poison, %entry ], [ %k3, %g ], [ undef, %entry ]

If both poison and undef values are incoming, we soften the poison values to undef.

Differential Revision: https://reviews.llvm.org/D97495

(cherry picked from commit 356cdabd3a9e0ff919ea2c1a35c8706ecb915297)
---
 llvm/lib/Transforms/Utils/Local.cpp           |  24 ++-
 .../Transforms/SimplifyCFG/poison-merge.ll    | 200 ++++++++++++++++++
 2 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/poison-merge.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index d055f3dd3084..ae26058c210c 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -918,6 +918,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,
 /// \param IncomingValues A map from block to value.
 static void replaceUndefValuesInPhi(PHINode *PN,
                                     const IncomingValueMap &IncomingValues) {
+  SmallVector<unsigned> TrueUndefOps;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *V = PN->getIncomingValue(i);
 
@@ -925,10 +926,31 @@ static void replaceUndefValuesInPhi(PHINode *PN,
 
     BasicBlock *BB = PN->getIncomingBlock(i);
     IncomingValueMap::const_iterator It = IncomingValues.find(BB);
-    if (It == IncomingValues.end()) continue;
 
+    // Keep track of undef/poison incoming values. Those must match, so we fix
+    // them up below if needed.
+    // Note: this is conservatively correct, but we could try harder and group
+    // the undef values per incoming basic block.
+    if (It == IncomingValues.end()) {
+      TrueUndefOps.push_back(i);
+      continue;
+    }
+
+    // There is a defined value for this incoming block, so map this undef
+    // incoming value to the defined value.
     PN->setIncomingValue(i, It->second);
   }
+
+  // If there are both undef and poison values incoming, then convert those
+  // values to undef. It is invalid to have different values for the same
+  // incoming block.
+  unsigned PoisonCount = count_if(TrueUndefOps, [&](unsigned i) {
+    return isa<PoisonValue>(PN->getIncomingValue(i));
+  });
+  if (PoisonCount != 0 && PoisonCount != TrueUndefOps.size()) {
+    for (unsigned i : TrueUndefOps)
+      PN->setIncomingValue(i, UndefValue::get(PN->getType()));
+  }
 }
 
 /// Replace a value flowing from a block to a phi with
diff --git a/llvm/test/Transforms/SimplifyCFG/poison-merge.ll b/llvm/test/Transforms/SimplifyCFG/poison-merge.ll
new file mode 100644
index 000000000000..93d9b0b299a6
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/poison-merge.ll
@@ -0,0 +1,200 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=0 < %s | FileCheck %s
+
+; Merge 2 undefined incoming values.
+
+define i32 @undef_merge(i32 %x) {
+; CHECK-LABEL: @undef_merge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 4, label [[G:%.*]]
+; CHECK-NEXT:    i32 12, label [[G]]
+; CHECK-NEXT:    ]
+; CHECK:       g:
+; CHECK-NEXT:    [[K3:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[G]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  switch i32 %x, label %exit [
+  i32 4, label %loop
+  i32 12, label %g
+  ]
+
+loop:
+  %k2 = phi i64 [ %k3, %g ], [ undef, %entry ]
+  br label %g
+
+g:
+  %k3 = phi i64 [ %k2, %loop ], [ undef, %entry ]
+  br label %loop
+
+exit:
+  ret i32 undef
+}
+
+; Merge 2 poison incoming values.
+
+define i32 @poison_merge(i32 %x) {
+; CHECK-LABEL: @poison_merge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 4, label [[G:%.*]]
+; CHECK-NEXT:    i32 12, label [[G]]
+; CHECK-NEXT:    ]
+; CHECK:       g:
+; CHECK-NEXT:    [[K3:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ poison, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[G]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  switch i32 %x, label %exit [
+  i32 4, label %loop
+  i32 12, label %g
+  ]
+
+loop:
+  %k2 = phi i64 [ %k3, %g ], [ poison, %entry ]
+  br label %g
+
+g:
+  %k3 = phi i64 [ %k2, %loop ], [ poison, %entry ]
+  br label %loop
+
+exit:
+  ret i32 undef
+}
+
+; Merge equal defined incoming values.
+
+define i32 @defined_merge(i32 %x) {
+; CHECK-LABEL: @defined_merge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 4, label [[G:%.*]]
+; CHECK-NEXT:    i32 12, label [[G]]
+; CHECK-NEXT:    ]
+; CHECK:       g:
+; CHECK-NEXT:    [[K3:%.*]] = phi i64 [ 42, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ 42, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[G]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  switch i32 %x, label %exit [
+  i32 4, label %loop
+  i32 12, label %g
+  ]
+
+loop:
+  %k2 = phi i64 [ %k3, %g ], [ 42, %entry ]
+  br label %g
+
+g:
+  %k3 = phi i64 [ %k2, %loop ], [ 42, %entry ]
+  br label %loop
+
+exit:
+  ret i32 undef
+}
+
+; Merge defined and undef incoming values.
+
+define i32 @defined_and_undef_merge(i32 %x) {
+; CHECK-LABEL: @defined_and_undef_merge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 4, label [[G:%.*]]
+; CHECK-NEXT:    i32 12, label [[G]]
+; CHECK-NEXT:    ]
+; CHECK:       g:
+; CHECK-NEXT:    [[K3:%.*]] = phi i64 [ 42, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ 42, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[G]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  switch i32 %x, label %exit [
+  i32 4, label %loop
+  i32 12, label %g
+  ]
+
+loop:
+  %k2 = phi i64 [ %k3, %g ], [ undef, %entry ]
+  br label %g
+
+g:
+  %k3 = phi i64 [ %k2, %loop ], [ 42, %entry ]
+  br label %loop
+
+exit:
+  ret i32 undef
+}
+
+; Merge defined and poison incoming values.
+
+define i32 @defined_and_poison_merge(i32 %x) {
+; CHECK-LABEL: @defined_and_poison_merge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 4, label [[G:%.*]]
+; CHECK-NEXT:    i32 12, label [[G]]
+; CHECK-NEXT:    ]
+; CHECK:       g:
+; CHECK-NEXT:    [[K3:%.*]] = phi i64 [ 42, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ 42, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[G]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  switch i32 %x, label %exit [
+  i32 4, label %loop
+  i32 12, label %g
+  ]
+
+loop:
+  %k2 = phi i64 [ %k3, %g ], [ poison, %entry ]
+  br label %g
+
+g:
+  %k3 = phi i64 [ %k2, %loop ], [ 42, %entry ]
+  br label %loop
+
+exit:
+  ret i32 undef
+}
+
+; Do not crash trying to merge poison and undef into a single phi.
+
+define i32 @PR49218(i32 %x) {
+; CHECK-LABEL: @PR49218(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[EXIT:%.*]] [
+; CHECK-NEXT:    i32 4, label [[G:%.*]]
+; CHECK-NEXT:    i32 12, label [[G]]
+; CHECK-NEXT:    ]
+; CHECK:       g:
+; CHECK-NEXT:    [[K3:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[K3]], [[G]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[G]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 undef
+;
+entry:
+  switch i32 %x, label %exit [
+  i32 4, label %loop
+  i32 12, label %g
+  ]
+
+loop:
+  %k2 = phi i64 [ %k3, %g ], [ undef, %entry ]
+  br label %g
+
+g:
+  %k3 = phi i64 [ %k2, %loop ], [ poison, %entry ]
+  br label %loop
+
+exit:
+  ret i32 undef
+}

From 344216979213d841b72e44891871c031db622f5d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Mon, 1 Mar 2021 12:17:10 -0800
Subject: [PATCH 176/244] Revert "[c++20] Mark class type NTTPs as done and
 start defining the feature test macro."

Some of the parts of this work were reverted; stop defining the feature
test macro for now.

This reverts commit b4c63ef6dd90dba9af26a111c9a78b121c5284b1.

(cherry picked from commit 564f5b0734bd5d265a0046e5ca9d08ae5bc303eb)
---
 clang/lib/Frontend/InitPreprocessor.cpp | 2 +-
 clang/test/Lexer/cxx-features.cpp       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index d47ad1b74649..c64a912ce919 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -565,7 +565,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
     Builder.defineMacro("__cpp_aggregate_bases", "201603L");
     Builder.defineMacro("__cpp_structured_bindings", "201606L");
     Builder.defineMacro("__cpp_nontype_template_args",
-                        LangOpts.CPlusPlus20 ? "201911L" : "201411L");
+                        "201411L"); // (not latest)
     Builder.defineMacro("__cpp_fold_expressions", "201603L");
     Builder.defineMacro("__cpp_guaranteed_copy_elision", "201606L");
     Builder.defineMacro("__cpp_nontype_template_parameter_auto", "201606L");
diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp
index f57faed4ed90..2f46f354ee83 100644
--- a/clang/test/Lexer/cxx-features.cpp
+++ b/clang/test/Lexer/cxx-features.cpp
@@ -181,7 +181,8 @@
 #error "wrong value for __cpp_structured_bindings"
 #endif
 
-#if check(nontype_template_args, 0, 0, 0, 201411, 201911, 201911)
+#if check(nontype_template_args, 0, 0, 0, 201411, 201411, 201411)
+// FIXME: 201911 in C++20
 #error "wrong value for __cpp_nontype_template_args"
 #endif
 

From 9760b282ff03ef581d51b3d74d5b33d09b463272 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 28 Feb 2021 11:23:46 -0800
Subject: [PATCH 177/244] [DAGCombiner][X86] Don't peek through ANDs on the
 shift amount in matchRotateSub when called from MatchFunnelPosNeg.

Peeking through AND is only valid if the input to both shifts is
the same. If the inputs are different, then the original pattern
ORs the two values when the masked shift amount is 0. This is ok
if the values are the same since the OR would be a NOP which is
why its ok for rotate.

Fixes PR49365 and reverts PR34641

Differential Revision: https://reviews.llvm.org/D97637

(cherry picked from commit 5de09ef02e24d234d9fc0cd1c6dfe18a1bb784b0)
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 +++++--
 llvm/test/CodeGen/X86/shift-double.ll         | 44 ++++++++++++-------
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 89670d708264..6a6f83827f72 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
 // to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
-                           SelectionDAG &DAG) {
+                           SelectionDAG &DAG, bool IsRotate) {
   // If EltSize is a power of 2 then:
   //
   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
   // always invokes undefined behavior for 32-bit X.
   //
   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+  //
+  // NOTE: We can only do this when matching an AND and not a general
+  // funnel shift.
   unsigned MaskLoBits = 0;
-  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+  if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
       unsigned Bits = Log2_64(EltSize);
@@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   //          (srl x, (*ext y))) ->
   //   (rotr x, y) or (rotl x, (sub 32, y))
   EVT VT = Shifted.getValueType();
-  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+                     /*IsRotate*/ true)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
                        HasPos ? Pos : Neg);
@@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
   // fold (or (shl x0, (*ext (sub 32, y))),
   //          (srl x1, (*ext y))) ->
   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
-  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
                        HasPos ? Pos : Neg);
diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll
index c0872957f2b8..1213a80921d2 100644
--- a/llvm/test/CodeGen/X86/shift-double.ll
+++ b/llvm/test/CodeGen/X86/shift-double.ll
@@ -480,23 +480,31 @@ define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind {
   ret i32 %sh
 }
 
-; PR34641 - Masked Shift Counts
+; These are not valid shld/shrd patterns. When the shift amount modulo
+; the bitwidth is zero, the result should be an OR of both operands not a
+; shift.
 
-define i32 @shld_safe_i32(i32, i32, i32) {
-; X86-LABEL: shld_safe_i32:
+define i32 @not_shld_i32(i32, i32, i32) {
+; X86-LABEL: not_shld_i32:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    negb %cl
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    orl %edx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: shld_safe_i32:
+; X64-LABEL: not_shld_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    shll %cl, %edi
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shldl %cl, %esi, %eax
+; X64-NEXT:    shrl %cl, %eax
+; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %4 = and i32 %2, 31
   %5 = shl i32 %0, %4
@@ -507,21 +515,27 @@ define i32 @shld_safe_i32(i32, i32, i32) {
   ret i32 %9
 }
 
-define i32 @shrd_safe_i32(i32, i32, i32) {
-; X86-LABEL: shrd_safe_i32:
+define i32 @not_shrd_i32(i32, i32, i32) {
+; X86-LABEL: not_shrd_i32:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    negb %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    orl %edx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: shrd_safe_i32:
+; X64-LABEL: not_shrd_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrdl %cl, %esi, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %4 = and i32 %2, 31
   %5 = lshr i32 %0, %4

From 4ed9f17e9390a6845cfd8a235f2078cb9b0e4719 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirst=C3=B3f=20Umann?= <dkszelethus@gmail.com>
Date: Fri, 5 Feb 2021 19:57:09 +0100
Subject: [PATCH 178/244] [analyzer] Add 12.0.0 release notes

Differential Revision: https://reviews.llvm.org/D96163
---
 clang/docs/ReleaseNotes.rst      | 33 +++++++++++++++++++++++++++++++-
 clang/docs/analyzer/checkers.rst |  2 ++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 64f737ff488f..7f4b675b68f9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -377,7 +377,38 @@ libclang
 Static Analyzer
 ---------------
 
-- ...
+.. 3ff220de9009 [analyzer][StdLibraryFunctionsChecker] Add POSIX networking functions
+.. ...And a million other patches.
+- Improve the analyzer's understanding of several POSIX functions.
+
+.. https://reviews.llvm.org/D86533#2238207
+- Greatly improved the analyzer’s constraint solver by better understanding
+  when constraints are imposed on multiple symbolic values that are known to be
+  equal or known to be non-equal. It will now also efficiently reject impossible
+  if-branches between known comparison expressions. (Incorrectly stated as a
+  11.0.0 feature in the previous release notes)
+
+.. 820e8d8656ec [Analyzer][WebKit] UncountedLambdaCaptureChecker
+- New checker: :ref:`webkit.UncountedLambdaCapturesChecker<webkit-UncountedLambdaCapturesChecker>`
+  is a WebKit coding convention checker that flags raw pointers to
+  reference-counted objects captured by lambdas and suggests using intrusive
+  reference-counting smart pointers instead.
+
+.. 8a64689e264c [Analyzer][WebKit] UncountedLocalVarsChecker
+- New checker: :ref:`alpha.webkit.UncountedLocalVarsChecker<alpha-webkit-UncountedLocalVarsChecker>`
+  is a WebKit coding convention checker that intends to make sure that any
+  uncounted local variable is backed by a ref-counted object with lifetime that
+  is strictly larger than the scope of the uncounted local variable.
+
+.. i914f6c4ff8a4 [StaticAnalyzer] Support struct annotations in FuchsiaHandleChecker
+- ``fuchia.HandleChecker`` now recognizes handles in structs; All the handles
+  referenced by the structure (direct value or ptr) would be treated as
+  containing the release/use/acquire annotations directly.
+
+.. 8deaec122ec6 [analyzer] Update Fuchsia checker to catch releasing unowned handles.
+- Fuchsia checkers can detect the release of an unowned handle.
+
+- Numerous fixes and improvements to bug report generation.
 
 .. _release-notes-ubsan:
 
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index b47be97eef96..d851845396ac 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2538,6 +2538,8 @@ We also define a set of safe transformations which if passed a safe value as an
 - casts
 - unary operators like ``&`` or ``*``
 
+.. _alpha-webkit-UncountedLocalVarsChecker:
+
 alpha.webkit.UncountedLocalVarsChecker
 """"""""""""""""""""""""""""""""""""""
 The goal of this rule is to make sure that any uncounted local variable is backed by a ref-counted object with lifetime that is strictly larger than the scope of the uncounted local variable. To be on the safe side we require the scope of an uncounted variable to be embedded in the scope of ref-counted object that backs it.

From 99350dcc3f5b46d564338c0067c2cbd139b841ee Mon Sep 17 00:00:00 2001
From: "Peyton, Jonathan L" <jonathan.l.peyton@intel.com>
Date: Tue, 2 Mar 2021 07:44:15 -0600
Subject: [PATCH 179/244] [OpenMP] Fix clang-cl build error regarding TSX
 intrinsics

Fix for https://bugs.llvm.org/show_bug.cgi?id=49339

The CMake check for the RTM intrinsics needs the -mrtm flag to be set
during the test. This way clang-cl correctly detects it has the
_xbegin() intrinsic. Otherwise, the CMake check fails.

Differential Revision: https://reviews.llvm.org/D97413

(cherry picked from commit e83380fccc2cc9842bdcfd268efddf6fce90544d)
---
 openmp/runtime/cmake/config-ix.cmake | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake
index f06fda6c0221..ed62aefccd14 100644
--- a/openmp/runtime/cmake/config-ix.cmake
+++ b/openmp/runtime/cmake/config-ix.cmake
@@ -172,6 +172,10 @@ if (IA32 OR INTEL64)
       }
       int main() { int a = __kmp_umwait(0, 1000); return a; }")
   check_cxx_source_compiles("${source_code}" LIBOMP_HAVE_WAITPKG_INTRINSICS)
+  set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+  if (LIBOMP_HAVE_MRTM_FLAG)
+    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mrtm")
+  endif()
   set(source_code "// check for attribute rtm and rtm intrinsics
       #ifdef IMMINTRIN_H
       #include <immintrin.h>
@@ -188,6 +192,7 @@ if (IA32 OR INTEL64)
       int main() { int a = __kmp_xbegin(); return a; }")
   check_cxx_source_compiles("${source_code}" LIBOMP_HAVE_RTM_INTRINSICS)
   set(CMAKE_REQUIRED_DEFINITIONS)
+  set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
 endif()
 
 # Find perl executable

From 52510d84802b55ecd80a904ca259adfecffc5be1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Mon, 1 Mar 2021 21:37:26 +0100
Subject: [PATCH 180/244] [GlobalISel] Bail on G_PHI narrowing of odd types
 (PR48188)

The current narrowing code for G_PHI can only handle the case
where the size is a multiple of the narrow size. If this is not
the case, fall back to SDAG instead of asserting.

Original patch by shepmaster.

Differential Revision: https://reviews.llvm.org/D92446

(cherry picked from commit c35761db0f078f74550ef56bfc0745c162d76967)
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  5 ++++
 llvm/test/CodeGen/AArch64/pr48188.ll          | 27 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/pr48188.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e7f40523efaf..3178ee16af2b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1063,6 +1063,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_PHI: {
+    // FIXME: add support for when SizeOp0 isn't an exact multiple of
+    // NarrowSize.
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+
     unsigned NumParts = SizeOp0 / NarrowSize;
     SmallVector<Register, 2> DstRegs(NumParts);
     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
diff --git a/llvm/test/CodeGen/AArch64/pr48188.ll b/llvm/test/CodeGen/AArch64/pr48188.ll
new file mode 100644
index 000000000000..2da02e640ec1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr48188.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+; GlobalISel cannot legalize this phi, so we fall back to SDAG.
+define void @test() nounwind {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #16 // =16
+; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    str x1, [sp] // 8-byte Folded Spill
+; CHECK-NEXT:    str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr x0, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr x1, [sp] // 8-byte Folded Reload
+; CHECK-NEXT:    str x1, [sp] // 8-byte Folded Spill
+; CHECK-NEXT:    str x0, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    b .LBB0_1
+entry:
+  br label %loop
+
+loop:
+  %p = phi i72 [ 0, %entry ], [ %p, %loop ]
+  br label %loop
+}

From d24e102ba2665dc6cd467f467813fba9c8261133 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Wed, 24 Feb 2021 12:37:22 -0500
Subject: [PATCH 181/244] [OpenMP] Fixed a crash when offloading to x86_64 with
 target nowait

PR#49334 reports a crash when offloading to x86_64 with `target nowait`,
which is caused by referencing a nullptr. The root cause of the issue is, when
pushing a hidden helper task in `__kmp_push_task`, it also maps the gtid to its
shadow gtid, which is wrong.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D97329

(cherry picked from commit e5da63d5a9ede1fb6d8aa18cfd44533ead128738)
---
 .../libomptarget/test/offloading/bug49334.cpp | 148 ++++++++++++++++++
 openmp/runtime/src/kmp_tasking.cpp            |   3 +-
 2 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 openmp/libomptarget/test/offloading/bug49334.cpp

diff --git a/openmp/libomptarget/test/offloading/bug49334.cpp b/openmp/libomptarget/test/offloading/bug49334.cpp
new file mode 100644
index 000000000000..b26cd7b2b338
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/bug49334.cpp
@@ -0,0 +1,148 @@
+// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda
+
+#include <cassert>
+#include <iostream>
+#include <memory>
+#include <vector>
+
+class BlockMatrix {
+private:
+  const int rowsPerBlock;
+  const int colsPerBlock;
+  const long nRows;
+  const long nCols;
+  const int nBlocksPerRow;
+  const int nBlocksPerCol;
+  std::vector<std::vector<std::unique_ptr<float[]>>> Blocks;
+
+public:
+  BlockMatrix(const int _rowsPerBlock, const int _colsPerBlock,
+              const long _nRows, const long _nCols)
+      : rowsPerBlock(_rowsPerBlock), colsPerBlock(_colsPerBlock), nRows(_nRows),
+        nCols(_nCols), nBlocksPerRow(_nRows / _rowsPerBlock),
+        nBlocksPerCol(_nCols / _colsPerBlock), Blocks(nBlocksPerCol) {
+    for (int i = 0; i < nBlocksPerCol; i++) {
+      for (int j = 0; j < nBlocksPerRow; j++) {
+        Blocks[i].emplace_back(new float[_rowsPerBlock * _colsPerBlock]);
+      }
+    }
+  };
+
+  // Initialize the BlockMatrix from 2D arrays
+  void Initialize(const std::vector<float> &matrix) {
+    for (int i = 0; i < nBlocksPerCol; i++)
+      for (int j = 0; j < nBlocksPerRow; j++) {
+        float *CurrBlock = GetBlock(i, j);
+        for (int ii = 0; ii < colsPerBlock; ++ii)
+          for (int jj = 0; jj < rowsPerBlock; ++jj) {
+            int curri = i * colsPerBlock + ii;
+            int currj = j * rowsPerBlock + jj;
+            CurrBlock[ii + jj * colsPerBlock] = matrix[curri + currj * nCols];
+          }
+      }
+  }
+
+  long Compare(const std::vector<float> &matrix) const {
+    long fail = 0;
+    for (int i = 0; i < nBlocksPerCol; i++)
+      for (int j = 0; j < nBlocksPerRow; j++) {
+        float *CurrBlock = GetBlock(i, j);
+        for (int ii = 0; ii < colsPerBlock; ++ii)
+          for (int jj = 0; jj < rowsPerBlock; ++jj) {
+            int curri = i * colsPerBlock + ii;
+            int currj = j * rowsPerBlock + jj;
+            float m_value = matrix[curri + currj * nCols];
+            float bm_value = CurrBlock[ii + jj * colsPerBlock];
+            if (bm_value != m_value) {
+              fail++;
+            }
+          }
+      }
+    return fail;
+  }
+
+  float *GetBlock(int i, int j) const {
+    assert(i < nBlocksPerCol && j < nBlocksPerRow && "Accessing outside block");
+    return Blocks[i][j].get();
+  }
+};
+
+constexpr const int BS = 256;
+constexpr const int N = 1024;
+
+int BlockMatMul_TargetNowait(BlockMatrix &A, BlockMatrix &B, BlockMatrix &C) {
+#pragma omp parallel
+#pragma omp master
+  for (int i = 0; i < N / BS; ++i)
+    for (int j = 0; j < N / BS; ++j) {
+      float *BlockC = C.GetBlock(i, j);
+      for (int k = 0; k < N / BS; ++k) {
+        float *BlockA = A.GetBlock(i, k);
+        float *BlockB = B.GetBlock(k, j);
+// clang-format off
+#pragma omp target depend(in: BlockA[0], BlockB[0]) depend(inout: BlockC[0])   \
+            map(to: BlockA[:BS * BS], BlockB[:BS * BS])                        \
+            map(tofrom: BlockC[:BS * BS]) nowait
+// clang-format on
+#pragma omp parallel for
+        for (int ii = 0; ii < BS; ii++)
+          for (int jj = 0; jj < BS; jj++) {
+            for (int kk = 0; kk < BS; ++kk)
+              BlockC[ii + jj * BS] +=
+                  BlockA[ii + kk * BS] * BlockB[kk + jj * BS];
+          }
+      }
+    }
+  return 0;
+}
+
+void Matmul(const std::vector<float> &a, const std::vector<float> &b,
+            std::vector<float> &c) {
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      float sum = 0.0;
+      for (int k = 0; k < N; ++k) {
+        sum = sum + a[i * N + k] * b[k * N + j];
+      }
+      c[i * N + j] = sum;
+    }
+  }
+}
+
+int main(int argc, char *argv[]) {
+  std::vector<float> a(N * N);
+  std::vector<float> b(N * N);
+  std::vector<float> c(N * N, 0.0);
+
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      a[i * N + j] = b[i * N + j] = i + j % 100;
+    }
+  }
+
+  auto BlockedA = BlockMatrix(BS, BS, N, N);
+  BlockedA.Initialize(a);
+  BlockedA.Compare(a);
+  auto BlockedB = BlockMatrix(BS, BS, N, N);
+  BlockedB.Initialize(b);
+  BlockedB.Compare(b);
+
+  Matmul(a, b, c);
+
+  auto BlockedC = BlockMatrix(BS, BS, N, N);
+  BlockMatMul_TargetNowait(BlockedA, BlockedB, BlockedC);
+
+  if (BlockedC.Compare(c) > 0) {
+    return 1;
+  }
+
+  std::cout << "PASS\n";
+
+  return 0;
+}
+
+// CHECK: PASS
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 3d7021128dbd..4bcd11946694 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -326,7 +326,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
 
-  if (taskdata->td_flags.hidden_helper) {
+  // We don't need to map to shadow gtid if it is already hidden helper thread
+  if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
     gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
     thread = __kmp_threads[gtid];
   }

From 46a1b0655666e21c56fa79560e9baee87405d4f4 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Fri, 5 Mar 2021 16:01:45 +0100
Subject: [PATCH 182/244] [AArch64] Legalize horizontal fmax/fmin reductions on
 f16 vectors

Expand the horizontal reduction during the instruction selection phase, but only if the target doesn't support the full fp16 instruction set.

Fixes https://bugs.llvm.org/show_bug.cgi?id=49401

Reviewed By: aemerson

Differential Revision: https://reviews.llvm.org/D97840

(cherry picked from commit 8725b24c6d4abaa97425e704652a13dacb35fe3f)
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  7 ++-
 .../AArch64/vecreduce-fmax-legalization.ll    | 62 ++++++++++++++++++-
 .../AArch64/vecreduce-fmin-legalization.ll    | 62 ++++++++++++++++++-
 3 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1be09186dc0a..1451151f4dc5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1017,11 +1017,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     // Vector reductions
     for (MVT VT : { MVT::v4f16, MVT::v2f32,
                     MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
-      setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
-      setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+      if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
+        setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+        setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
 
-      if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())
         setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
+      }
     }
     for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
                     MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index f1ebd8fa85ea..d26db2aefee0 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
 
 declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
 declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
 declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
 
+declare half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
 declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
 declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
@@ -44,6 +46,64 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
   ret fp128 %b
 }
 
+define half @test_v4f16(<4 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v4f16:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-NEXT:    mov h3, v0.h[1]
+; CHECK-NOFP-NEXT:    mov h1, v0.h[3]
+; CHECK-NOFP-NEXT:    mov h2, v0.h[2]
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v4f16:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    fmaxnmv h0, v0.4h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
+  ret half %b
+}
+
+define half @test_v4f16_ninf(<4 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v4f16_ninf:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-NEXT:    mov h3, v0.h[1]
+; CHECK-NOFP-NEXT:    mov h1, v0.h[3]
+; CHECK-NOFP-NEXT:    mov h2, v0.h[2]
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v4f16_ninf:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    fmaxnmv h0, v0.4h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
+  ret half %b
+}
+
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index 4129fa80b13e..52d6e9773ab2 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
 
 declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a)
 declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
 declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
 declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a)
 
+declare half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
 declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
 declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
 declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
@@ -44,6 +46,64 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
   ret fp128 %b
 }
 
+define half @test_v4f16(<4 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v4f16:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-NEXT:    mov h3, v0.h[1]
+; CHECK-NOFP-NEXT:    mov h1, v0.h[3]
+; CHECK-NOFP-NEXT:    mov h2, v0.h[2]
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v4f16:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    fminnmv h0, v0.4h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
+  ret half %b
+}
+
+define half @test_v4f16_ninf(<4 x half> %a) nounwind {
+; CHECK-NOFP-LABEL: test_v4f16_ninf:
+; CHECK-NOFP:       // %bb.0:
+; CHECK-NOFP-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-NEXT:    mov h3, v0.h[1]
+; CHECK-NOFP-NEXT:    mov h1, v0.h[3]
+; CHECK-NOFP-NEXT:    mov h2, v0.h[2]
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s3, h3
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s3
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s2, h2
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s2
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    fcvt s0, h0
+; CHECK-NOFP-NEXT:    fcvt s1, h1
+; CHECK-NOFP-NEXT:    fminnm s0, s0, s1
+; CHECK-NOFP-NEXT:    fcvt h0, s0
+; CHECK-NOFP-NEXT:    ret
+;
+; CHECK-FP-LABEL: test_v4f16_ninf:
+; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    fminnmv h0, v0.4h
+; CHECK-FP-NEXT:    ret
+  %b = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
+  ret half %b
+}
+
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-LABEL: test_v3f32:
 ; CHECK:       // %bb.0:

From f8b32989241cca87a8690c8cc404f06ce1f90e4c Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Wed, 3 Mar 2021 16:01:12 +0000
Subject: [PATCH 183/244] [clang-tidy] Deprecate readability-deleted-default
 check

... For removal in next release cycle.
The clang warning that does the same thing is enabled by default and typically emits better diagnostics making this check surplus to requirements.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D97491

(cherry picked from commit 19aefd2d5dc3a8d3b8e81219973828170b7fcd2c)
---
 clang-tools-extra/docs/ReleaseNotes.rst       | 10 ++++++++++
 .../checks/readability-deleted-default.rst    | 20 +++----------------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index b3c9c829198b..29321bb3eb04 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -358,6 +358,16 @@ Changes in existing checks
 
   Added `std::basic_string_view` to default list of ``string``-like types.
 
+Deprecated checks
+^^^^^^^^^^^^^^^^^
+
+- The :doc:`readability-deleted-default
+  <clang-tidy/checks/readability-deleted-default>` check has been deprecated.
+  
+  The clang warning `Wdefaulted-function-deleted
+  <https://clang.llvm.org/docs/DiagnosticsReference.html#wdefaulted-function-deleted>`_
+  will diagnose the same issues and is enabled by default.
+
 Improvements to include-fixer
 -----------------------------
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst
index 00134eb05484..5f2083e00061 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability-deleted-default.rst
@@ -3,20 +3,6 @@
 readability-deleted-default
 ===========================
 
-Checks that constructors and assignment operators marked as ``= default`` are
-not actually deleted by the compiler.
-
-.. code-block:: c++
-
-  class Example {
-  public:
-    // This constructor is deleted because I is missing a default value.
-    Example() = default;
-    // This is fine.
-    Example(const Example& Other) = default;
-    // This operator is deleted because I cannot be assigned (it is const).
-    Example& operator=(const Example& Other) = default;
-
-  private:
-    const int I;
-  };
+This check has been deprecated prefer to make use of the `Wdefaulted-function-deleted
+<https://clang.llvm.org/docs/DiagnosticsReference.html#wdefaulted-function-deleted>`_
+flag.

From a123beacce408af8c2de1f39d522ac6b6c4b5d1b Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Tue, 9 Feb 2021 14:06:17 +0900
Subject: [PATCH 184/244] [LoopVectorize] Fix VPRecipeBuilder::createEdgeMask
 to correctly generate the mask

This patch fixes pr48832 by correctly generating the mask when a poison value is involved.

Consider this CFG (which is a part of the input):

```
for.body:                                         ; preds = %for.cond
  br i1 true, label %cond.false, label %land.rhs

land.rhs:                                         ; preds = %for.body
  br i1 poison, label %cond.end, label %cond.false

cond.false:                                       ; preds = %for.body, %land.rhs
  br label %cond.end

cond.end:                                         ; preds = %land.rhs, %cond.false
  %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ]

```

The path for.body -> land.rhs -> cond.end should be taken when 'select i1 false, i1 poison, i1 false' holds (which means it's never taken); but VPRecipeBuilder::createEdgeMask was emitting 'and i1 false, poison' instead.
The former one successfully blocks poison propagation whereas the latter one doesn't, making the condition poison and thus causing the miscompilation.

SimplifyCFG has a similar bug (which didn't expose a real-world bug yet), and a patch for this is also ongoing (see https://reviews.llvm.org/D95026).

Reviewed By: bjope

Differential Revision: https://reviews.llvm.org/D95217

(cherry picked from commit ed253ef77248d91a15b3a1aa36c0b74bed8ec8af)
---
 .../Vectorize/LoopVectorizationPlanner.h      |  4 ++
 .../Transforms/Vectorize/LoopVectorize.cpp    | 11 ++++-
 .../LoopVectorize/X86/masked_load_store.ll    | 48 +++++++++----------
 .../x86-interleaved-accesses-masked-group.ll  | 12 ++---
 .../LoopVectorize/if-conversion-nest.ll       |  2 +-
 .../LoopVectorize/if-pred-non-void.ll         |  2 +-
 .../Transforms/LoopVectorize/if-reduction.ll  |  8 ++--
 llvm/test/Transforms/LoopVectorize/pr48832.ll | 40 ++++++++++++++++
 .../LoopVectorize/reduction-inloop-pred.ll    |  4 +-
 .../LoopVectorize/reduction-inloop.ll         |  4 +-
 10 files changed, 93 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/pr48832.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 1795470fa58c..19797e6f7858 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -142,6 +142,10 @@ class VPBuilder {
     return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
   }
 
+  VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
+    return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
+  }
+
   //===--------------------------------------------------------------------===//
   // RAII helpers.
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 47635dbdda02..d36e078444bc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8195,8 +8195,15 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
   if (BI->getSuccessor(0) != Dst)
     EdgeMask = Builder.createNot(EdgeMask);
 
-  if (SrcMask) // Otherwise block in-mask is all-one, no need to AND.
-    EdgeMask = Builder.createAnd(EdgeMask, SrcMask);
+  if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
+    // The condition is 'SrcMask && EdgeMask', which is equivalent to
+    // 'select i1 SrcMask, i1 EdgeMask, i1 false'.
+    // The select version does not introduce new UB if SrcMask is false and
+    // EdgeMask is poison. Using 'and' here introduces undefined behavior.
+    VPValue *False = Plan->getOrAddVPValue(
+        ConstantInt::getFalse(BI->getCondition()->getType()));
+    EdgeMask = Builder.createSelect(SrcMask, EdgeMask, False);
+  }
 
   return EdgeMaskCache[Edge] = EdgeMask;
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index dddedcb77f67..b464389fe393 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -2042,10 +2042,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
 ; AVX1-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX1-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX1-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
-; AVX1-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
-; AVX1-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
-; AVX1-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
-; AVX1-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX1-NEXT:    [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
+; AVX1-NEXT:    [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
+; AVX1-NEXT:    [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
+; AVX1-NEXT:    [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
 ; AVX1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX1-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
 ; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2166,10 +2166,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
 ; AVX2-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX2-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX2-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
-; AVX2-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
-; AVX2-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
-; AVX2-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
-; AVX2-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX2-NEXT:    [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
+; AVX2-NEXT:    [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
+; AVX2-NEXT:    [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
+; AVX2-NEXT:    [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
 ; AVX2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX2-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
 ; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2290,10 +2290,10 @@ define void @foo7(double* noalias nocapture %out, double** noalias nocapture rea
 ; AVX512-NEXT:    [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ; AVX512-NEXT:    [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ; AVX512-NEXT:    [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-; AVX512-NEXT:    [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]]
-; AVX512-NEXT:    [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]]
-; AVX512-NEXT:    [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]]
-; AVX512-NEXT:    [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]]
+; AVX512-NEXT:    [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer
+; AVX512-NEXT:    [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer
+; AVX512-NEXT:    [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer
+; AVX512-NEXT:    [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer
 ; AVX512-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX512-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>*
 ; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]])
@@ -2459,10 +2459,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
 ; AVX1-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX1-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX1-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
-; AVX1-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
-; AVX1-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
-; AVX1-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
-; AVX1-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX1-NEXT:    [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
+; AVX1-NEXT:    [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
+; AVX1-NEXT:    [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
+; AVX1-NEXT:    [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
 ; AVX1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX1-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
 ; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2583,10 +2583,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
 ; AVX2-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX2-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
 ; AVX2-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
-; AVX2-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
-; AVX2-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
-; AVX2-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
-; AVX2-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX2-NEXT:    [[TMP52:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
+; AVX2-NEXT:    [[TMP53:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP49]], <4 x i1> zeroinitializer
+; AVX2-NEXT:    [[TMP54:%.*]] = select <4 x i1> [[TMP30]], <4 x i1> [[TMP50]], <4 x i1> zeroinitializer
+; AVX2-NEXT:    [[TMP55:%.*]] = select <4 x i1> [[TMP31]], <4 x i1> [[TMP51]], <4 x i1> zeroinitializer
 ; AVX2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX2-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
 ; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
@@ -2707,10 +2707,10 @@ define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture rea
 ; AVX512-NEXT:    [[TMP49:%.*]] = xor <8 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ; AVX512-NEXT:    [[TMP50:%.*]] = xor <8 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
 ; AVX512-NEXT:    [[TMP51:%.*]] = xor <8 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-; AVX512-NEXT:    [[TMP52:%.*]] = and <8 x i1> [[TMP48]], [[TMP28]]
-; AVX512-NEXT:    [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP29]]
-; AVX512-NEXT:    [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP30]]
-; AVX512-NEXT:    [[TMP55:%.*]] = and <8 x i1> [[TMP51]], [[TMP31]]
+; AVX512-NEXT:    [[TMP52:%.*]] = select <8 x i1> [[TMP28]], <8 x i1> [[TMP48]], <8 x i1> zeroinitializer
+; AVX512-NEXT:    [[TMP53:%.*]] = select <8 x i1> [[TMP29]], <8 x i1> [[TMP49]], <8 x i1> zeroinitializer
+; AVX512-NEXT:    [[TMP54:%.*]] = select <8 x i1> [[TMP30]], <8 x i1> [[TMP50]], <8 x i1> zeroinitializer
+; AVX512-NEXT:    [[TMP55:%.*]] = select <8 x i1> [[TMP31]], <8 x i1> [[TMP51]], <8 x i1> zeroinitializer
 ; AVX512-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
 ; AVX512-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <8 x double>*
 ; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <8 x double>* [[TMP57]], i32 8, <8 x i1> [[TMP52]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index 285f460d99d5..aa8b1361fe4e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -408,7 +408,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -520,7 +520,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
@@ -615,7 +615,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -727,7 +727,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nsw i32 [[INDEX]], 3
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
@@ -1535,7 +1535,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -1871,7 +1871,7 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[INDEX]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP2]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP0]], [[TMP1]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <8 x i1> [[TMP1]], [[TMP0]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 0cba3fc20ed9..f218869c1fbe 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -39,7 +39,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i1> [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i1> [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP16:%.*]] = and <4 x i1> [[TMP10]], [[TMP15]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index 308377f06856..b8d9b458aa4c 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -161,7 +161,7 @@ for.cond.cleanup:                                 ; preds = %if.end
 ; CHECK: %[[CMP1:.+]] = icmp slt <2 x i32> %[[VAL:.+]], <i32 100, i32 100>
 ; CHECK: %[[CMP2:.+]] = icmp sge <2 x i32> %[[VAL]], <i32 200, i32 200>
 ; CHECK: %[[NOT:.+]] = xor <2 x i1> %[[CMP1]], <i1 true, i1 true>
-; CHECK: %[[AND:.+]] = and <2 x i1> %[[CMP2]], %[[NOT]]
+; CHECK: %[[AND:.+]] = select <2 x i1> %[[NOT]], <2 x i1> %[[CMP2]], <2 x i1> zeroinitializer
 ; CHECK: %[[OR:.+]] = or <2 x i1> %[[AND]], %[[CMP1]]
 ; CHECK: %[[EXTRACT:.+]] = extractelement <2 x i1> %[[OR]], i32 0
 ; CHECK: br i1 %[[EXTRACT]], label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index a97301659cb9..bde4fbcc9d13 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -610,9 +610,9 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], <float 3.000000e+00,
 ; CHECK-DAG: %[[M2:.*]] = fmul fast <4 x float> %[[V0]], <float 2.000000e+00,
 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
-; CHECK-DAG: %[[C12:.*]] = and <4 x i1> %[[C2]], %[[C11]]
+; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
-; CHECK: %[[C22:.*]] = and <4 x i1> %[[C21]], %[[C11]]
+; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
 ; CHECK: fadd fast <4 x float> %[[S2]],
@@ -678,9 +678,9 @@ for.end:                                          ; preds = %for.inc, %entry
 ; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float>
 ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float>
 ; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
-; CHECK-DAG: %[[C12:.*]] = and <4 x i1> %[[C2]], %[[C11]]
+; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer
 ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
-; CHECK: %[[C22:.*]] = and <4 x i1> %[[C21]], %[[C11]]
+; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
 define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly {
diff --git a/llvm/test/Transforms/LoopVectorize/pr48832.ll b/llvm/test/Transforms/LoopVectorize/pr48832.ll
new file mode 100644
index 000000000000..620da918bb47
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr48832.ll
@@ -0,0 +1,40 @@
+; RUN: opt -loop-vectorize -force-vector-width=4 -S -o - < %s | FileCheck %s
+%arrayt = type [64 x i32]
+
+@v_146 = external global %arrayt, align 1
+
+; Since the program has well defined behavior, it should not introduce store poison
+; CHECK: vector.ph:
+; CHECK-NEXT: br label %vector.body
+; CHECK: vector.body:
+; CHECK: store <4 x i32> zeroinitializer,
+; CHECK: br i1 %{{.*}}, label %middle.block, label %vector.body
+
+define void @foo() {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %storemerge = phi i16 [ 0, %entry ], [ %inc, %cond.end ]
+  %cmp = icmp slt i16 %storemerge, 15
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br i1 true, label %cond.false, label %land.rhs
+
+land.rhs:                                         ; preds = %for.body
+  br i1 poison, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %for.body, %land.rhs
+  br label %cond.end
+
+cond.end:                                         ; preds = %land.rhs, %cond.false
+  %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ]
+  %arrayidx = getelementptr inbounds %arrayt, %arrayt* @v_146, i16 0, i16 %storemerge
+  store i32 %cond, i32* %arrayidx, align 1
+  %inc = add nsw i16 %storemerge, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index d1b99e4e403b..e8271b9c5984 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -1542,8 +1542,8 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 23bfc39bf646..b295090ca928 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -814,8 +814,8 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>

From bff59aca162ef16d7634dc9df39f1f3af31ecb93 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 4 Mar 2021 22:30:38 -0800
Subject: [PATCH 185/244] [TargetLowering] Use HandleSDNodes to prevent nodes
 from being deleted by recursive calls in getNegatedExpression.

For binary or ternary ops we call getNegatedExpression multiple
times and then compare costs. While we're doing this we need to
hold a node from the first call across the second call, but its
not yet attached to the DAG. Its possible the second call creates
an identical node and then decides it didn't need it so will try
to delete it if it has no uses. This can cause a reference to the
node we're holding further up the call stack to become invalidated.

To prevent this, we can use a HandleSDNode to artifically give
the node a use without connecting it to the DAG.

I've used a std::list of HandleSDNodes so we can create handles
only when we have a node to hold. HandleSDNode does not have
default constructor and cannot be copied or moved.

Fixes PR49393.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D97914

(cherry picked from commit 74e6030bcbcc8e628f9a99a424342a0c656456f9)
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 29 ++++++++++
 llvm/test/CodeGen/X86/pr49393.ll              | 55 +++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr49393.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7145fc91d5f3..b0ad86899d25 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5935,6 +5935,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   SDLoc DL(Op);
 
+  // Because getNegatedExpression can delete nodes we need a handle to keep
+  // temporary nodes alive in case the recursion manages to create an identical
+  // node.
+  std::list<HandleSDNode> Handles;
+
   switch (Opcode) {
   case ISD::ConstantFP: {
     // Don't invert constant FP values after legalization unless the target says
@@ -6003,11 +6008,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     NegatibleCost CostX = NegatibleCost::Expensive;
     SDValue NegX =
         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+    // Prevent this node from being deleted by the next call.
+    if (NegX)
+      Handles.emplace_back(NegX);
+
     // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
     NegatibleCost CostY = NegatibleCost::Expensive;
     SDValue NegY =
         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
 
+    // We're done with the handles.
+    Handles.clear();
+
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
@@ -6052,11 +6064,18 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     NegatibleCost CostX = NegatibleCost::Expensive;
     SDValue NegX =
         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+    // Prevent this node from being deleted by the next call.
+    if (NegX)
+      Handles.emplace_back(NegX);
+
     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
     NegatibleCost CostY = NegatibleCost::Expensive;
     SDValue NegY =
         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
 
+    // We're done with the handles.
+    Handles.clear();
+
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = CostX;
@@ -6094,15 +6113,25 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
     if (!NegZ)
       break;
 
+    // Prevent this node from being deleted by the next two calls.
+    Handles.emplace_back(NegZ);
+
     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
     NegatibleCost CostX = NegatibleCost::Expensive;
     SDValue NegX =
         getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
+    // Prevent this node from being deleted by the next call.
+    if (NegX)
+      Handles.emplace_back(NegX);
+
     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
     NegatibleCost CostY = NegatibleCost::Expensive;
     SDValue NegY =
         getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
 
+    // We're done with the handles.
+    Handles.clear();
+
     // Negate the X if its cost is less or equal than Y.
     if (NegX && (CostX <= CostY)) {
       Cost = std::min(CostX, CostZ);
diff --git a/llvm/test/CodeGen/X86/pr49393.ll b/llvm/test/CodeGen/X86/pr49393.ll
new file mode 100644
index 000000000000..9952b90fc7b7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49393.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @f() {
+; CHECK-LABEL: f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %for.cond
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    imull %eax, %eax
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movapd %xmm0, %xmm1
+; CHECK-NEXT:    mulsd %xmm0, %xmm1
+; CHECK-NEXT:    subsd %xmm0, %xmm1
+; CHECK-NEXT:    cwtl
+; CHECK-NEXT:    xorps %xmm2, %xmm2
+; CHECK-NEXT:    cvtsi2sd %eax, %xmm2
+; CHECK-NEXT:    mulsd %xmm0, %xmm2
+; CHECK-NEXT:    mulsd %xmm0, %xmm2
+; CHECK-NEXT:    movapd %xmm2, %xmm3
+; CHECK-NEXT:    mulsd %xmm1, %xmm3
+; CHECK-NEXT:    mulsd %xmm0, %xmm2
+; CHECK-NEXT:    subsd %xmm3, %xmm1
+; CHECK-NEXT:    addsd %xmm2, %xmm1
+; CHECK-NEXT:    cvttsd2si %xmm1, %eax
+; CHECK-NEXT:    jmp .LBB0_1
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %b.0 = phi i16 [ 0, %entry ], [ %conv77, %for.cond ]
+  %mul18 = mul i16 %b.0, %b.0
+  %arrayidx.real = load double, double* undef, align 1
+  %arrayidx.imag = load double, double* undef, align 1
+  %mul_ac = fmul fast double %arrayidx.real, %arrayidx.real
+  %0 = fadd fast double 0.000000e+00, %arrayidx.real
+  %sub.r = fsub fast double %mul_ac, %0
+  %sub.i = fsub fast double 0.000000e+00, %arrayidx.imag
+  %conv28 = sitofp i16 %mul18 to double
+  %mul_bc32 = fmul fast double %arrayidx.imag, %conv28
+  %mul_bd46 = fmul fast double %mul_bc32, %arrayidx.imag
+  %mul_r49 = fsub fast double 0.000000e+00, %mul_bd46
+  %mul_ac59 = fmul fast double %mul_r49, %sub.r
+  %mul_bc48 = fmul fast double %mul_bc32, %arrayidx.real
+  %mul_i50 = fadd fast double 0.000000e+00, %mul_bc48
+  %1 = fmul fast double %mul_i50, %sub.i
+  %.neg = fneg fast double %0
+  %.neg19 = fmul fast double %1, -1.000000e+00
+  %.neg20 = fadd fast double %.neg, %mul_ac
+  %2 = fadd fast double %.neg20, %mul_ac59
+  %sub.r75 = fadd fast double %2, %.neg19
+  %conv77 = fptosi double %sub.r75 to i16
+  br label %for.cond
+}

From 15d1ee36720ff24323f55452ae3cfb63f318c3f3 Mon Sep 17 00:00:00 2001
From: Raul Tambre <raul.tambre@cleveron.com>
Date: Sat, 6 Mar 2021 11:45:57 +0200
Subject: [PATCH 186/244] [CMake][compiler-rt] Use copying instead of
 symlinking for LSE builtins on non-Unix-likes

As reported in D93278 post-review symlinking requires privilege escalation on Windows.
Copying is functionally same, so fallback to it for systems that aren't Unix-like.
This is similar to the solution in AddLLVM.cmake.

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D98111

(cherry picked from commit ba860963b156db3b653c67ef044df877f3cea9cc)
---
 compiler-rt/lib/builtins/CMakeLists.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index b511a9a987b3..73b6bead8424 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -515,6 +515,12 @@ set(aarch64_SOURCES
 set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir")
 file(MAKE_DIRECTORY "${OA_HELPERS_DIR}")
 
+if(CMAKE_HOST_UNIX)
+  set(COMPILER_RT_LINK_OR_COPY create_symlink)
+else()
+  set(COMPILER_RT_LINK_OR_COPY copy)
+endif()
+
 foreach(pat cas swp ldadd ldclr ldeor ldset)
   foreach(size 1 2 4 8 16)
     foreach(model 1 2 3 4)
@@ -522,7 +528,7 @@ foreach(pat cas swp ldadd ldclr ldeor ldset)
         set(helper_asm "${OA_HELPERS_DIR}/outline_atomic_${pat}${size}_${model}.S")
         add_custom_command(
           OUTPUT ${helper_asm}
-          COMMAND ${CMAKE_COMMAND} -E create_symlink "${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S" "${helper_asm}"
+          COMMAND ${CMAKE_COMMAND} -E ${COMPILER_RT_LINK_OR_COPY} "${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S" "${helper_asm}"
         )
         set_source_files_properties("${helper_asm}"
           PROPERTIES

From c016eda3257eb0f67a989065d174bc5e13ed7096 Mon Sep 17 00:00:00 2001
From: Amilendra Kodithuwakku <amilendra.kodithuwakku@arm.com>
Date: Fri, 12 Mar 2021 19:19:29 +0000
Subject: [PATCH 187/244] [release][docs] List all cores Arm has added support
 for in LLVM 12.

Reviewed By: kristof.beyls

Differential Revision: https://reviews.llvm.org/D98277
---
 clang/docs/ReleaseNotes.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7f4b675b68f9..46e11fcb31cb 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -139,6 +139,15 @@ Modified Compiler Flags
   This behavior matches newer GCC.
   (`D91760 <https://reviews.llvm.org/D91760>`_)
   (`D92054 <https://reviews.llvm.org/D92054>`_)
+- Support has been added for the following processors (command-line identifiers
+  in parentheses):
+  - Arm Cortex-A78C (cortex-a78c).
+  - Arm Cortex-R82 (cortex-r82).
+  - Arm Neoverse V1 (neoverse-v1).
+  - Arm Neoverse N2 (neoverse-n2).
+  - Fujitsu A64FX (a64fx).
+  For example, to select architecture support and tuning for Neoverse-V1 based
+  systems, use ``-mcpu=neoverse-v1``.
 
 Removed Compiler Flags
 -------------------------

From ca14f0282fcec0324b921d27907a704b3a156d0f Mon Sep 17 00:00:00 2001
From: Amilendra Kodithuwakku <amilendra.kodithuwakku@arm.com>
Date: Fri, 12 Mar 2021 20:03:23 +0000
Subject: [PATCH 188/244] [release][docs] List all cores Arm has added support
 for in LLVM 12.

Add new-line before sub-list for proper rendering.

Differential Revision: https://reviews.llvm.org/D98277
---
 clang/docs/ReleaseNotes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 46e11fcb31cb..451bc65b9f5b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -141,6 +141,7 @@ Modified Compiler Flags
   (`D92054 <https://reviews.llvm.org/D92054>`_)
 - Support has been added for the following processors (command-line identifiers
   in parentheses):
+
   - Arm Cortex-A78C (cortex-a78c).
   - Arm Cortex-R82 (cortex-r82).
   - Arm Neoverse V1 (neoverse-v1).

From 00441b8f4e5b7daa39ac6cbeb45ebfe54662b08d Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Tue, 16 Mar 2021 12:07:15 +0000
Subject: [PATCH 189/244] [OpenCL][Docs] Release notes

Differential Revision: https://reviews.llvm.org/D98076
---
 clang/docs/ReleaseNotes.rst | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 451bc65b9f5b..b35d81c60b7b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -210,10 +210,38 @@ C++1z Feature Support
 Objective-C Language Changes in Clang
 -------------------------------------
 
-OpenCL C Language Changes in Clang
-----------------------------------
-
-...
+OpenCL Kernel Language Changes in Clang
+---------------------------------------
+
+- Improved online documentation: :doc:`UsersManual` and :doc:`OpenCLSupport`
+  pages.
+- Added ``-cl-std=CL3.0`` and predefined version macro for OpenCL 3.0.
+- Added ``-cl-std=CL1.0`` and mapped to the existing OpenCL 1.0 functionality.
+- Improved OpenCL extension handling per target.
+- Added clang extension for function pointers ``__cl_clang_function_pointers``
+  and variadic functions ``__cl_clang_variadic_functions``, more details can be
+  found in :doc:`LanguageExtensions`.
+- Removed extensions without kernel language changes:
+  ``cl_khr_select_fprounding_mode``, ``cl_khr_gl_sharing``, ``cl_khr_icd``,
+  ``cl_khr_gl_event``, ``cl_khr_d3d10_sharing``, ``cl_khr_context_abort``,
+  ``cl_khr_d3d11_sharing``, ``cl_khr_dx9_media_sharing``,
+  ``cl_khr_image2d_from_buffer``, ``cl_khr_initialize_memory``,
+  ``cl_khr_gl_depth_images``, ``cl_khr_spir``, ``cl_khr_egl_event``,
+  ``cl_khr_egl_image``, ``cl_khr_terminate_context``.
+- Improved diagnostics for  unevaluated ``vec_step`` expression.
+- Allow nested pointers (e.g. pointer-to-pointer) kernel arguments beyond OpenCL
+  1.2.
+- Added ``global_device`` and ``global_host`` address spaces for USM
+  allocations.
+
+Miscellaneous improvements in C++ for OpenCL support:
+
+- Added diagnostics for pointers to member functions and references to
+  functions.
+- Added support of ``vec_step`` builtin.
+- Fixed ICE on address spaces with forwarding references and templated copy
+  constructors.
+- Removed warning for variadic macro use.
 
 ABI Changes in Clang
 --------------------

From e3186ba0f3b5a5cf2a42155ff5ee8350cbda1486 Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
Date: Thu, 18 Mar 2021 16:08:58 +0000
Subject: [PATCH 190/244] [aarch64][WOA64][docs] Release note for WoA-hosted
 LLVM 12 binary

Reviewed By: peterwaller-arm

Differential Revision: https://reviews.llvm.org/D98415
---
 clang/docs/ReleaseNotes.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b35d81c60b7b..f3499d167361 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -193,6 +193,13 @@ Windows Support
   exception. To workaround (with reduced security), compile with
   /guard:cf,nolongjmp.
 
+- Windows on Arm64: LLVM 12 adds official binary release hosted on
+  Windows on Arm64.  The binary is built and tested by Linaro alongside
+  AArch64 and ARM 32-bit Linux binary releases.  This first WoA release
+  includes Clang compiler, LLD Linker, and compiler-rt runtime libraries.
+  Work on LLDB, sanitizer support, OpenMP, and other features is in progress
+  and will be included in future Windows on Arm64 LLVM releases.
+
 C Language Changes in Clang
 ---------------------------
 

From 4990141a4366eb00abdc8252d7cbb8adeacb9954 Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
Date: Fri, 19 Mar 2021 13:37:19 +0000
Subject: [PATCH 191/244] [WoA][MSVC] Use default linker setting in
 MSVC-compatible driver [take 2]

At the moment "link.exe" is hard-coded as default linker in MSVC.cpp,
so there's no way to use LLD as default linker for MSVC driver.

This patch adds checking of CLANG_DEFAULT_LINKER to MSVC.cpp and
updates unit-tests that expect link.exe linker to explicitly select it
via -fuse-ld=link, so that buildbots and other builds that set
-DCLANG_DEFAULT_LINKER=foobar don't fail these tests.

This is a squash of
- https://reviews.llvm.org/D98493 (MSVC.cpp change) and
- https://reviews.llvm.org/D98862 (unit-tests change)

Fixes https://bugs.llvm.org/show_bug.cgi?id=49624

Reviewed By: maxim-kuvyrkov

Differential Revision: https://reviews.llvm.org/D98935

(cherry-picked from commit 2049fe58903b68f66872a18e608f40e5233b55fb)
---
 clang/lib/Driver/ToolChains/MSVC.cpp |  6 +++++-
 clang/test/Driver/Xlinker-args.c     |  2 +-
 clang/test/Driver/cl-inputs.c        |  6 +++---
 clang/test/Driver/cl-link-at-file.c  |  2 +-
 clang/test/Driver/cl-link.c          | 22 +++++++++++-----------
 clang/test/Driver/msvc-link.c        |  8 ++++----
 clang/test/OpenMP/linking.c          |  4 ++--
 7 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index f4b7a57e0bb7..13943b6c404a 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -11,6 +11,7 @@
 #include "Darwin.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Version.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
@@ -520,7 +521,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   // translate 'lld' into 'lld-link', and in the case of the regular msvc
   // linker, we need to use a special search algorithm.
   llvm::SmallString<128> linkPath;
-  StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ, "link");
+  StringRef Linker
+    = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER);
+  if (Linker.empty())
+    Linker = "link";
   if (Linker.equals_lower("lld"))
     Linker = "lld-link";
 
diff --git a/clang/test/Driver/Xlinker-args.c b/clang/test/Driver/Xlinker-args.c
index a44957cd8aef..cb045a1d40ac 100644
--- a/clang/test/Driver/Xlinker-args.c
+++ b/clang/test/Driver/Xlinker-args.c
@@ -17,7 +17,7 @@
 // LINUX: "--no-demangle" "-e" "_start" "one" "two" "three" "four" "-z" "five" "-r" {{.*}} "-T" "a.lds"
 
 // Check that we forward '-Xlinker' and '-Wl,' on Windows.
-// RUN: %clang -target i686-pc-win32 -### \
+// RUN: %clang -target i686-pc-win32 -fuse-ld=link -### \
 // RUN:   -Xlinker one -Wl,two %s 2>&1 | \
 // RUN:   FileCheck -check-prefix=WIN %s
 // WIN: link.exe
diff --git a/clang/test/Driver/cl-inputs.c b/clang/test/Driver/cl-inputs.c
index 59455a0aa5e5..8eb44517ee16 100644
--- a/clang/test/Driver/cl-inputs.c
+++ b/clang/test/Driver/cl-inputs.c
@@ -50,16 +50,16 @@
 // RUN: %clang_cl -### /Tc - 2>&1 | FileCheck -check-prefix=STDINTc %s
 // STDINTc: "-x" "c"
 
-// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s
+// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test.lib 2>&1 | FileCheck -check-prefix=LIBINPUT %s
 // LIBINPUT: link.exe"
 // LIBINPUT: "cl-test.lib"
 
-// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s
+// RUN: env LIB=%S/Inputs/cl-libs %clang_cl -fuse-ld=link -### -- %s cl-test2.lib 2>&1 | FileCheck -check-prefix=LIBINPUT2 %s
 // LIBINPUT2: error: no such file or directory: 'cl-test2.lib'
 // LIBINPUT2: link.exe"
 // LIBINPUT2-NOT: "cl-test2.lib"
 
-// RUN: %clang_cl -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s
+// RUN: %clang_cl -fuse-ld=link -### -- %s /nonexisting.lib 2>&1 | FileCheck -check-prefix=LIBINPUT3 %s
 // LIBINPUT3: error: no such file or directory: '/nonexisting.lib'
 // LIBINPUT3: link.exe"
 // LIBINPUT3-NOT: "/nonexisting.lib"
diff --git a/clang/test/Driver/cl-link-at-file.c b/clang/test/Driver/cl-link-at-file.c
index 50ae07fadf5b..4e665f89b74e 100644
--- a/clang/test/Driver/cl-link-at-file.c
+++ b/clang/test/Driver/cl-link-at-file.c
@@ -7,7 +7,7 @@
 
 // RUN: echo /link bar.lib baz.lib > %t.args
 // RUN: touch %t.obj
-// RUN: %clang_cl -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS
+// RUN: %clang_cl -fuse-ld=link -### @%t.args -- %t.obj 2>&1 | FileCheck %s -check-prefix=ARGS
 // If the "/link" option captures all remaining args beyond its response file,
 // it will also capture "--" and our input argument. In this case, Clang will
 // be clueless and will emit "argument unused" warnings. If PR17239 is properly
diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c
index 142725fed8eb..e2f5397e9133 100644
--- a/clang/test/Driver/cl-link.c
+++ b/clang/test/Driver/cl-link.c
@@ -2,14 +2,14 @@
 // be interpreted as a command-line option, e.g. on Mac where %s is commonly
 // under /Users.
 
-// RUN: %clang_cl /Tc%s -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
-// RUN: %clang_cl /Tc%s -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
+// RUN: %clang_cl /Tc%s -fuse-ld=link -### /link foo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
+// RUN: %clang_cl /Tc%s -fuse-ld=link -### /linkfoo bar baz 2>&1 | FileCheck --check-prefix=LINK %s
 // LINK: link.exe
 // LINK: "foo"
 // LINK: "bar"
 // LINK: "baz"
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN %s
 // ASAN: link.exe
 // ASAN: "-debug"
 // ASAN: "-incremental:no"
@@ -19,7 +19,7 @@
 // ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx-i386.lib"
 // ASAN: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s
 // ASAN-MD: link.exe
 // ASAN-MD: "-debug"
 // ASAN-MD: "-incremental:no"
@@ -29,13 +29,13 @@
 // ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib"
 // ASAN-MD: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl /LD -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
-// RUN: %clang_cl /LDd -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
+// RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
+// RUN: %clang_cl /LDd -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s
 // DLL: link.exe
 // "-dll"
 
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
-// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
+// RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /LDd /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-DLL %s
 // ASAN-DLL: link.exe
 // ASAN-DLL: "-dll"
 // ASAN-DLL: "-debug"
@@ -43,13 +43,13 @@
 // ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk-i386.lib"
 // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj"
 
-// RUN: %clang_cl /Zi /Tc%s -### 2>&1 | FileCheck --check-prefix=DEBUG %s
+// RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s
 // DEBUG: link.exe
 // DEBUG: "-debug"
 
 // PR27234
-// RUN: %clang_cl /Tc%s nonexistent.obj -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
-// RUN: %clang_cl /Tc%s nonexistent.lib -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
+// RUN: %clang_cl /Tc%s nonexistent.obj -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
+// RUN: %clang_cl /Tc%s nonexistent.lib -fuse-ld=link -### /link /libpath:somepath 2>&1 | FileCheck --check-prefix=NONEXISTENT %s
 // NONEXISTENT-NOT: no such file
 // NONEXISTENT: link.exe
 // NONEXISTENT: "/libpath:somepath"
diff --git a/clang/test/Driver/msvc-link.c b/clang/test/Driver/msvc-link.c
index 13dccd21bfd8..1ee17fc63c32 100644
--- a/clang/test/Driver/msvc-link.c
+++ b/clang/test/Driver/msvc-link.c
@@ -1,4 +1,4 @@
-// RUN: %clang -target i686-pc-windows-msvc -### %s 2>&1 | FileCheck --check-prefix=BASIC %s
+// RUN: %clang -target i686-pc-windows-msvc -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=BASIC %s
 // BASIC: link.exe"
 // BASIC: "-out:a.exe"
 // BASIC: "-defaultlib:libcmt"
@@ -6,7 +6,7 @@
 // BASIC: "-nologo"
 // BASIC-NOT: "-Brepro"
 
-// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -### %s 2>&1 | FileCheck --check-prefix=DLL %s
+// RUN: %clang -target i686-pc-windows-msvc -shared -o a.dll -fuse-ld=link -### %s 2>&1 | FileCheck --check-prefix=DLL %s
 // DLL: link.exe"
 // DLL: "-out:a.dll"
 // DLL: "-defaultlib:libcmt"
@@ -19,13 +19,13 @@
 // LIBPATH: "-libpath:/usr/lib"
 // LIBPATH: "-nologo"
 
-// RUN: %clang_cl /Brepro -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s
+// RUN: %clang_cl /Brepro -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=REPRO %s
 // REPRO: link.exe"
 // REPRO: "-out:msvc-link.exe"
 // REPRO: "-nologo"
 // REPRO: "-Brepro"
 
-// RUN: %clang_cl /Brepro- -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s
+// RUN: %clang_cl /Brepro- -fuse-ld=link -### -- %s 2>&1 | FileCheck --check-prefix=NOREPRO %s
 // NOREPRO: link.exe"
 // NOREPRO: "-out:msvc-link.exe"
 // NOREPRO: "-nologo"
diff --git a/clang/test/OpenMP/linking.c b/clang/test/OpenMP/linking.c
index 802553c1be75..1c4439626470 100644
--- a/clang/test/OpenMP/linking.c
+++ b/clang/test/OpenMP/linking.c
@@ -81,7 +81,7 @@
 // CHECK-LD-OVERRIDE-64: "-lgomp" "-lrt"
 // CHECK-LD-OVERRIDE-64: "-lpthread" "-lc"
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -no-canonical-prefixes -fuse-ld=link %s -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libomp -target x86_64-msvc-win32 -rtlib=platform \
 // RUN:   | FileCheck --check-prefix=CHECK-MSVC-LINK-64 %s
 // CHECK-MSVC-LINK-64: link.exe
@@ -95,7 +95,7 @@
 // SIMD-ONLY11-NOT: libomp
 // SIMD-ONLY11-NOT: libgomp
 //
-// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: %clang -no-canonical-prefixes %s -fuse-ld=link -### -o %t.o 2>&1 \
 // RUN:     -fopenmp=libiomp5 -target x86_64-msvc-win32 -rtlib=platform \
 // RUN:   | FileCheck --check-prefix=CHECK-MSVC-ILINK-64 %s
 

From f4c01f33f450f654a63363b4eb84bf744c24959c Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Wed, 24 Mar 2021 12:28:00 -0400
Subject: [PATCH 192/244] [Support] Fix 'keeping' temporary files on Windows 7

As reported here: https://bugs.llvm.org/show_bug.cgi?id=48378#c0
and here: https://github.com/rust-lang/rust/issues/81051
since 79657e2339b58bc01fe1b85a448bb073d57d90bb, some programs such as llvm-ar
don't work properly on Windows 7.

The issue is shown in the snippet by Oleksandr Prodan:
https://pastebin.com/v51m3uBU

In essence, once the 'DeleteFile' flag has been set on FILE_DISPOSITION_INFO,
the file path can't be queried anymore with GetFinalPathNameByHandleW. This
however works on Windows 10, GetFinalPathNameByHandleW would return sucessfully.

To workaround the issue, we simply reset the 'DeleteFile' flag before even
checking if we're dealing with a network file.

Tested with `llvm-ar r empty.a a.obj` ran on a network mount. At the moment, we
cannot specifically add a test coverage for this, since it requres mounting a
network drive.

(cherry picked from commit 64ab2b6825c5aeae6e4afa7ef0829b89a6828102)
---
 llvm/lib/Support/Windows/Path.inc | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index dc9bcf868381..adcbd1b5f8f3 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -402,8 +402,22 @@ std::error_code is_local(int FD, bool &Result) {
 }
 
 static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
-  // First, check if the file is on a network (non-local) drive. If so, don't
-  // set DeleteFile to true, since it prevents opening the file for writes.
+  // Clear the FILE_DISPOSITION_INFO flag first, before checking if it's a
+  // network file. On Windows 7 the function realPathFromHandle() below fails
+  // if the FILE_DISPOSITION_INFO flag was already set to 'DeleteFile = true' by
+  // a prior call.
+  FILE_DISPOSITION_INFO Disposition;
+  Disposition.DeleteFile = false;
+  if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
+                                  sizeof(Disposition)))
+    return mapWindowsError(::GetLastError());
+  if (!Delete)
+    return std::error_code();
+
+  // Check if the file is on a network (non-local) drive. If so, don't
+  // continue when DeleteFile is true, since it prevents opening the file for
+  // writes. Note -- this will leak temporary files on disk, but only when the
+  // target file is on a network drive.
   SmallVector<wchar_t, 128> FinalPath;
   if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
     return EC;
@@ -415,9 +429,9 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
   if (!IsLocal)
     return std::error_code();
 
-  // The file is on a local drive, set the DeleteFile to true.
-  FILE_DISPOSITION_INFO Disposition;
-  Disposition.DeleteFile = Delete;
+  // The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
+  // flag.
+  Disposition.DeleteFile = true;
   if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition,
                                   sizeof(Disposition)))
     return mapWindowsError(::GetLastError());

From e94372d1b395a6461e7d973917b3a3c29563a5e6 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 12 Mar 2021 07:56:54 -0500
Subject: [PATCH 193/244] [SimplifyCFG] avoid sinking insts within an
 infinite-loop

The test is reduced from a C source example in:
https://llvm.org/PR49541

It's possible that the test could be reduced further or
the predicate generalized further, but it seems to require
a few ingredients (including the "late" SimplifyCFG options
on the RUN line) to fall into the infinite-loop trap.

(cherry picked from commit bd197ed0a57a82187ed3c6265ca811d412acfaef)
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 19 ++++---
 .../Transforms/SimplifyCFG/sink-inf-loop.ll   | 49 +++++++++++++++++++
 2 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 7cfe17618cde..de9560df9785 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1628,6 +1628,11 @@ static bool canSinkInstructions(
         I->getType()->isTokenTy())
       return false;
 
+    // Do not try to sink an instruction in an infinite loop - it can cause
+    // this algorithm to infinite loop.
+    if (I->getParent()->getSingleSuccessor() == I->getParent())
+      return false;
+
     // Conservatively return false if I is an inline-asm instruction. Sinking
     // and merging inline-asm instructions can potentially create arguments
     // that cannot satisfy the inline-asm constraints.
@@ -1714,13 +1719,13 @@ static bool canSinkInstructions(
   return true;
 }
 
-// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// Assuming canSinkInstructions(Blocks) has returned true, sink the last
 // instruction of every block in Blocks to their common successor, commoning
 // into one instruction.
 static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
 
-  // canSinkLastInstruction returning true guarantees that every block has at
+  // canSinkInstructions returning true guarantees that every block has at
   // least one non-terminator instruction.
   SmallVector<Instruction*,4> Insts;
   for (auto *BB : Blocks) {
@@ -1733,9 +1738,9 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   }
 
   // The only checking we need to do now is that all users of all instructions
-  // are the same PHI node. canSinkLastInstruction should have checked this but
-  // it is slightly over-aggressive - it gets confused by commutative instructions
-  // so double-check it here.
+  // are the same PHI node. canSinkInstructions should have checked this but
+  // it is slightly over-aggressive - it gets confused by commutative
+  // instructions so double-check it here.
   Instruction *I0 = Insts.front();
   if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
@@ -1746,11 +1751,11 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       return false;
   }
 
-  // We don't need to do any more checking here; canSinkLastInstruction should
+  // We don't need to do any more checking here; canSinkInstructions should
   // have done it all for us.
   SmallVector<Value*, 4> NewOperands;
   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
-    // This check is different to that in canSinkLastInstruction. There, we
+    // This check is different to that in canSinkInstructions. There, we
     // cared about the global view once simplifycfg (and instcombine) have
     // completed - it takes into account PHIs that become trivially
     // simplifiable.  However here we need a more local view; if an operand
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll
new file mode 100644
index 000000000000..37399367efce
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/sink-inf-loop.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -sink-common-insts=true -S | FileCheck %s
+
+; This would infinite-loop because we allowed code sinking to examine an infinite-loop block (%j).
+
+define void @PR49541(i32* %t1, i32 %a, i1 %bool) {
+; CHECK-LABEL: @PR49541(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[I:%.*]]
+; CHECK:       j:
+; CHECK-NEXT:    [[T3:%.*]] = phi i32 [ [[B:%.*]], [[J:%.*]] ], [ [[A:%.*]], [[COND_TRUE:%.*]] ], [ [[A]], [[COND_FALSE:%.*]] ]
+; CHECK-NEXT:    [[T2:%.*]] = phi i32 [ [[T2]], [[J]] ], [ [[PRE2:%.*]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
+; CHECK-NEXT:    [[B]] = load i32, i32* [[T1:%.*]], align 4
+; CHECK-NEXT:    br label [[J]]
+; CHECK:       i:
+; CHECK-NEXT:    [[G_1:%.*]] = phi i16 [ undef, [[ENTRY:%.*]] ], [ [[G_1]], [[COND_FALSE]] ]
+; CHECK-NEXT:    br i1 [[BOOL:%.*]], label [[COND_FALSE]], label [[COND_TRUE]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TOBOOL9_NOT:%.*]] = icmp eq i16 [[G_1]], 0
+; CHECK-NEXT:    [[PRE2]] = load i32, i32* [[T1]], align 4
+; CHECK-NEXT:    br label [[J]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[T1]], align 4
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i32 [[T5]], 0
+; CHECK-NEXT:    br i1 [[B2]], label [[J]], label [[I]]
+;
+entry:
+  br label %i
+
+j:
+  %t3 = phi i32 [ %b, %j ], [ %a, %cond.true ], [ %a, %cond.false ]
+  %t2 = phi i32 [ %t2, %j ], [ %pre2, %cond.true ], [ 0, %cond.false ]
+  %b = load i32, i32* %t1, align 4
+  br label %j
+
+i:
+  %g.1 = phi i16 [ undef, %entry ], [ %g.1, %cond.false ]
+  br i1 %bool, label %cond.false, label %cond.true
+
+cond.true:
+  %tobool9.not = icmp eq i16 %g.1, 0
+  %pre2 = load i32, i32* %t1, align 4
+  br label %j
+
+cond.false:
+  %t5 = load i32, i32* %t1, align 4
+  %b2 = icmp eq i32 %t5, 0
+  br i1 %b2, label %j, label %i
+}

From f43958b7c497c526b238607624ee0069888f4c98 Mon Sep 17 00:00:00 2001
From: Shilei Tian <tianshilei1992@gmail.com>
Date: Thu, 18 Mar 2021 18:25:21 -0400
Subject: [PATCH 194/244] [OpenMP] Fixed a crash in hidden helper thread

It is reported that after enabling hidden helper thread, the program
can hit the assertion `new_gtid < __kmp_threads_capacity` sometimes. The root
cause is explained as follows. Let's say the default `__kmp_threads_capacity` is
`N`. If hidden helper thread is enabled, `__kmp_threads_capacity` will be offset
to `N+8` by default. If the number of threads we need exceeds `N+8`, e.g. via
`num_threads` clause, we need to expand `__kmp_threads`. In
`__kmp_expand_threads`, the expansion starts from `__kmp_threads_capacity`, and
repeatedly doubling it until the new capacity meets the requirement. Let's
assume the new requirement is `Y`.  If `Y` happens to meet the constraint
`(N+8)*2^X=Y` where `X` is the number of iterations, the new capacity is not
enough because we have 8 slots for hidden helper threads.

Here is an example.
```
#include <vector>

int main(int argc, char *argv[]) {
  constexpr const size_t N = 1344;
  std::vector<int> data(N);

#pragma omp parallel for
  for (unsigned i = 0; i < N; ++i) {
    data[i] = i;
  }

#pragma omp parallel for num_threads(N)
  for (unsigned i = 0; i < N; ++i) {
    data[i] += i;
  }

  return 0;
}
```
My CPU is 20C40T, then `__kmp_threads_capacity` is 160. After offset,
`__kmp_threads_capacity` becomes 168. `1344 = (160+8)*2^3`, then the assertions
hit.

Reviewed By: protze.joachim

Differential Revision: https://reviews.llvm.org/D98838

(cherry picked from commit 2df65f87c1ea81008768e14522e5d9277234ba70)
---
 openmp/runtime/src/kmp_runtime.cpp            | 15 ++++++-
 openmp/runtime/src/kmp_settings.cpp           |  7 +--
 .../capacity_mix_threads.cpp                  | 45 +++++++++++++++++++
 .../hidden_helper_task/capacity_nthreads.cpp  | 31 +++++++++++++
 4 files changed, 94 insertions(+), 4 deletions(-)
 create mode 100644 openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
 create mode 100644 openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp

diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a6e32bd008e1..b981f8740dbe 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
   if (TCR_PTR(__kmp_threads[0]) == NULL) {
     --capacity;
   }
+  // If it is not for initializing the hidden helper team, we need to take
+  // __kmp_hidden_helper_threads_num out of the capacity because it is included
+  // in __kmp_threads_capacity.
+  if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+    capacity -= __kmp_hidden_helper_threads_num;
+  }
   if (__kmp_nth + new_nthreads -
           (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
       capacity) {
@@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
     --capacity;
   }
 
+  // If it is not for initializing the hidden helper team, we need to take
+  // __kmp_hidden_helper_threads_num out of the capacity because it is included
+  // in __kmp_threads_capacity.
+  if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+    capacity -= __kmp_hidden_helper_threads_num;
+  }
+
   /* see if there are too many threads */
   if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
     if (__kmp_tp_cached) {
@@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
     /* find an available thread slot */
     // Don't reassign the zero slot since we need that to only be used by
     // initial thread. Slots for hidden helper threads should also be skipped.
-    if (initial_thread && __kmp_threads[0] == NULL) {
+    if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
       gtid = 0;
     } else {
       for (gtid = __kmp_hidden_helper_threads_num + 1;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b477edbbfb42..50f6a05faaf5 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
     nth = (4 * __kmp_xproc);
 
   // If hidden helper task is enabled, we initialize the thread capacity with
-  // extra
-  // __kmp_hidden_helper_threads_num.
-  nth += __kmp_hidden_helper_threads_num;
+  // extra __kmp_hidden_helper_threads_num.
+  if (__kmp_enable_hidden_helper) {
+    nth += __kmp_hidden_helper_threads_num;
+  }
 
   if (nth > __kmp_max_nth)
     nth = __kmp_max_nth;
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
new file mode 100644
index 000000000000..776aee9d8e2c
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+void dummy_root() {
+  // omp_get_max_threads() will do middle initialization
+  int nthreads = omp_get_max_threads();
+  std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+}
+
+int main(int argc, char *argv[]) {
+  const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+                                  4 * omp_get_num_procs()),
+                         std::numeric_limits<int>::max());
+
+  std::vector<int> data(N);
+
+  // Create a new thread to initialize the OpenMP RTL. The new thread will not
+  // be taken as the "initial thread".
+  std::thread root(dummy_root);
+
+#pragma omp parallel for num_threads(N)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] = i;
+  }
+
+#pragma omp parallel for num_threads(N + 1)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] += i;
+  }
+
+  for (unsigned i = 0; i < N; ++i) {
+    assert(data[i] == 2 * i);
+  }
+
+  root.join();
+
+  return 0;
+}
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
new file mode 100644
index 000000000000..a9d394f729e9
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
@@ -0,0 +1,31 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+  const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+                                  4 * omp_get_num_procs()),
+                         std::numeric_limits<int>::max());
+
+  std::vector<int> data(N);
+
+#pragma omp parallel for num_threads(N)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] = i;
+  }
+
+#pragma omp parallel for num_threads(N + 1)
+  for (unsigned i = 0; i < N; ++i) {
+    data[i] += i;
+  }
+
+  for (unsigned i = 0; i < N; ++i) {
+    assert(data[i] == 2 * i);
+  }
+
+  return 0;
+}

From 8ca56905dd9bdade269b5bc91528495884b62bf5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Wed, 10 Mar 2021 14:37:09 +0100
Subject: [PATCH 195/244] [PowerPC] Fix infinite loop in peephole CR
 optimization (PR49509)

If we encounter a degenerate select node where both operands are
the same, then we can continue negating the condition while swapping
operands, resulting in an infinite loop. Avoid this by bailing out
if both operands are the same.

Fixes https://bugs.llvm.org/show_bug.cgi?id=49509.

Differential Revision: https://reviews.llvm.org/D98340

(cherry picked from commit 2489cbaa8057c736475fd88990f4f6dbf022873d)
---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp |  6 ++
 llvm/test/CodeGen/PowerPC/pr49509.ll        | 81 +++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr49509.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 693b0adaede4..2604218da160 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5896,7 +5896,13 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
         User->getMachineOpcode() != PPC::SELECT_I8)
       return false;
 
+    SDNode *Op1 = User->getOperand(1).getNode();
     SDNode *Op2 = User->getOperand(2).getNode();
+    // If we have a degenerate select with two equal operands, swapping will
+    // not do anything, and we may run into an infinite loop.
+    if (Op1 == Op2)
+      return false;
+
     if (!Op2->isMachineOpcode())
       return false;
 
diff --git a/llvm/test/CodeGen/PowerPC/pr49509.ll b/llvm/test/CodeGen/PowerPC/pr49509.ll
new file mode 100644
index 000000000000..f13733c18047
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr49509.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+
+define void @test() {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    bc 12, 20, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %bb2
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    lis 3, 256
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_2: # %bb1
+; CHECK-NEXT:    bclr 4, 20, 0
+; CHECK-NEXT:  # %bb.3: # %bb66
+; CHECK-NEXT:    lwz 4, 12(0)
+; CHECK-NEXT:    lwz 5, 8(0)
+; CHECK-NEXT:    lwz 6, 0(0)
+; CHECK-NEXT:    lwz 7, 4(0)
+; CHECK-NEXT:    lbz 3, 0(3)
+; CHECK-NEXT:    and 5, 5, 6
+; CHECK-NEXT:    and 4, 4, 7
+; CHECK-NEXT:    and 4, 4, 5
+; CHECK-NEXT:    cmpwi 3, 0
+; CHECK-NEXT:    lis 3, 256
+; CHECK-NEXT:    lis 7, 512
+; CHECK-NEXT:    bc 12, 2, .LBB0_4
+; CHECK-NEXT:    b .LBB0_5
+; CHECK-NEXT:  .LBB0_4: # %bb66
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:  .LBB0_5: # %bb66
+; CHECK-NEXT:    cmpwi 1, 4, -1
+; CHECK-NEXT:    cmpwi 5, 4, -1
+; CHECK-NEXT:    li 6, 0
+; CHECK-NEXT:    bc 12, 6, .LBB0_6
+; CHECK-NEXT:    b .LBB0_7
+; CHECK-NEXT:  .LBB0_6: # %bb66
+; CHECK-NEXT:    addi 3, 7, 0
+; CHECK-NEXT:  .LBB0_7: # %bb66
+; CHECK-NEXT:    cror 20, 22, 2
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    bc 12, 20, .LBB0_9
+; CHECK-NEXT:  # %bb.8: # %bb66
+; CHECK-NEXT:    ori 3, 6, 0
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_9: # %bb66
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:  .LBB0_10: # %bb66
+; CHECK-NEXT:    stw 3, 0(3)
+; CHECK-NEXT:    blr
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb2:                                              ; preds = %bb
+  %i = select i1 undef, i64 0, i64 72057594037927936
+  store i64 %i, i64* undef, align 8
+  ret void
+
+bb1:                                              ; preds = %bb
+  %i50 = load i8, i8* undef, align 8
+  %i52 = load i128, i128* null, align 8
+  %i62 = icmp eq i8 %i50, 0
+  br i1 undef, label %bb66, label %bb64
+
+bb64:                                             ; preds = %bb63
+  ret void
+
+bb66:                                             ; preds = %bb63
+  %i67 = lshr i128 -1, 0
+  %i68 = xor i128 %i52, -1
+  %i69 = add i128 0, %i68
+  %i70 = and i128 %i67, %i69
+  %i71 = icmp eq i128 %i70, 0
+  %i74 = select i1 %i62, i64 0, i64 72057594037927936
+  %i75 = select i1 %i71, i64 144115188075855872, i64 %i74
+  store i64 %i75, i64* undef, align 8
+  ret void
+}

From e89cdf8937bb6017cc99b05823428dd2fd673368 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Wed, 10 Mar 2021 13:25:33 -0500
Subject: [PATCH 196/244] [OpenMP] Restore backwards compatibility for
 libomptarget

Summary:
The changes introduced in D87946 changed the API for libomptarget
functions. `__kmpc_push_target_tripcount` was a function in Clang 11.x
but was not given a backward-compatible interface. This change will
require people using Clang 13.x or 12.x to recompile their offloading
programs.

Reviewed By: jdoerfert cchen

Differential Revision: https://reviews.llvm.org/D98358

(cherry picked from commit 807466ef28125cf7268c860b09d5563c9c93602a)
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp                  |  2 +-
 .../target_teams_distribute_parallel_for_codegen.cpp   |  4 ++--
 ...target_teams_distribute_parallel_for_if_codegen.cpp | 10 +++++-----
 ...get_teams_distribute_parallel_for_order_codegen.cpp |  2 +-
 ...rget_teams_distribute_parallel_for_simd_codegen.cpp |  2 +-
 ...t_teams_distribute_parallel_for_simd_if_codegen.cpp | 10 +++++-----
 clang/test/OpenMP/teams_distribute_codegen.cpp         |  2 +-
 .../OpenMP/teams_distribute_parallel_for_codegen.cpp   |  2 +-
 .../teams_distribute_parallel_for_simd_codegen.cpp     |  2 +-
 clang/test/OpenMP/teams_distribute_simd_codegen.cpp    |  2 +-
 llvm/include/llvm/Frontend/OpenMP/OMPKinds.def         |  4 ++--
 llvm/test/Transforms/OpenMP/add_attributes.ll          |  6 +++---
 openmp/libomptarget/include/omptarget.h                |  6 ++++--
 openmp/libomptarget/src/exports                        |  3 ++-
 openmp/libomptarget/src/interface.cpp                  |  7 ++++++-
 openmp/libomptarget/src/omptarget.cpp                  |  4 ++--
 16 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 57cc2d60e2af..83dfa0780547 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9892,7 +9892,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
       CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
+              CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
           Args);
     }
   };
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 0229ace911f8..c0f53239aa13 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -39,7 +39,7 @@
 
 #ifdef CK1
 
-// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount
+// HCK_NO_TGT-NOT: @__kmpc_push_target_tripcount_mapper
 
 // HCK1: define{{.*}} i32 @{{.+}}target_teams_fun{{.*}}(
 int target_teams_fun(int *g){
@@ -60,7 +60,7 @@ int target_teams_fun(int *g){
   // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
   // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
-  // HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
   // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 6650e0557511..efe7df819fb6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -49,10 +49,10 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp target teams distribute parallel for
@@ -107,12 +107,12 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
index b2ab37f22ec3..b99ba9d38a43 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
@@ -14,7 +14,7 @@
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: %0 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 0, i32 0)
 // CHECK: call void [[TARGET_OUTLINE:@.+]]()
 // CHECK: ret void
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index e6049145702b..39ccb87462c0 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -60,7 +60,7 @@ int target_teams_fun(int *g){
 // HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
 // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
 // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
-// HCK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+// HCK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
 // HCK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
 // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index 8b0eaba07f1c..19dc15b94f64 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -43,10 +43,10 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #ifdef OMP5
@@ -110,12 +110,12 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK-NOT: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK-NOT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
+// CHECK: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 100)
 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}},
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp
index 5bbb100e669e..aab5cced4c70 100644
--- a/clang/test/OpenMP/teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_codegen.cpp
@@ -33,7 +33,7 @@ int teams_argument_global(int n) {
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index b63e5aeddb7a..8fa73e76009b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -32,7 +32,7 @@ int teams_argument_global(int n){
   // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index 3d479c4cd29d..9b3855c61759 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -33,7 +33,7 @@ int teams_argument_global(int n){
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index fd1214d22ce9..6e5d06b0c568 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -35,7 +35,7 @@ int teams_argument_global(int n) {
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
-  // CK1: call void @__kmpc_push_target_tripcount(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
+  // CK1: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @{{.+}}, i64 -1, i64 %{{.+}})
   // CK1: call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i8** null, i32 {{.+}}, i32 1)
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 844046167975..75d360bf4237 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -375,7 +375,7 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr,
 __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32,
           /* omp_allocator_handle_t */ VoidPtr)
 
-__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64)
+__OMP_RTL(__kmpc_push_target_tripcount_mapper, false, Void, IdentPtr, Int64, Int64)
 __OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr,
           VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
 __OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32,
@@ -844,7 +844,7 @@ __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
 __OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
 
-__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_push_target_tripcount_mapper, SetterAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index b294542667bd..8476f42dd529 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -627,7 +627,7 @@ declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
 
 declare void @__kmpc_destroy_allocator(i32, i8*)
 
-declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64)
+declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
 
 declare i32 @__kmpc_warp_active_thread_mask()
 
@@ -1144,7 +1144,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
 
 ; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64)
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
 
 ; CHECK: ; Function Attrs: convergent nounwind
 ; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
@@ -1669,7 +1669,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
 
 ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(%struct.ident_t*, i64, i64)
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64)
 
 ; OPTIMISTIC: ; Function Attrs: convergent nounwind
 ; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 46bb8206efa1..36c25c33798a 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -283,8 +283,10 @@ int __tgt_target_teams_nowait_mapper(
     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList);
 
-void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
-                                  uint64_t loop_tripcount);
+void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
+
+void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
+                                         uint64_t loop_tripcount);
 
 #ifdef __cplusplus
 }
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 5e09a088533d..b7fc1c8c3c86 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -25,6 +25,8 @@ VERS1.0 {
     __tgt_target_teams_nowait_mapper;
     __tgt_mapper_num_components;
     __tgt_push_mapper_component;
+    __kmpc_push_target_tripcount;
+    __kmpc_push_target_tripcount_mapper;
     omp_get_num_devices;
     omp_get_initial_device;
     omp_target_alloc;
@@ -34,7 +36,6 @@ VERS1.0 {
     omp_target_memcpy_rect;
     omp_target_associate_ptr;
     omp_target_disassociate_ptr;
-    __kmpc_push_target_tripcount;
   local:
     *;
 };
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 01f3715d6bcc..b97676a6981b 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -514,8 +514,13 @@ EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base,
       MapComponentInfoTy(base, begin, size, type, name));
 }
 
-EXTERN void __kmpc_push_target_tripcount(ident_t *loc, int64_t device_id,
+EXTERN void __kmpc_push_target_tripcount(int64_t device_id,
                                          uint64_t loop_tripcount) {
+  __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount);
+}
+
+EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
+                                                uint64_t loop_tripcount) {
   TIMESCOPE_WITH_IDENT(loc);
   if (IsOffloadDisabled())
     return;
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 37150aae2fe6..af6f7d09a4a2 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -900,8 +900,8 @@ TableMap *getTableMap(void *HostPtr) {
 
 /// Get loop trip count
 /// FIXME: This function will not work right if calling
-/// __kmpc_push_target_tripcount in one thread but doing offloading in another
-/// thread, which might occur when we call task yield.
+/// __kmpc_push_target_tripcount_mapper in one thread but doing offloading in
+/// another thread, which might occur when we call task yield.
 uint64_t getLoopTripCount(int64_t DeviceId) {
   DeviceTy &Device = PM->Devices[DeviceId];
   uint64_t LoopTripCount = 0;

From f05b649610564b11c481a20598dbb3f532c4602a Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 7 Mar 2021 17:27:22 +0100
Subject: [PATCH 197/244] [FastISel] Don't trivially kill extractvalues
 (PR49467)

All extractvalues of the same value at the same index will map to
the same register, so even if one specific extractvalue only has
one use, we should not mark it as a trivial kill, as there may be
more extractvalues later.

Fixes https://bugs.llvm.org/show_bug.cgi?id=49467.

Differential Revision: https://reviews.llvm.org/D98145

(cherry picked from commit 55ae279ba7a5905f39ce3ae79eac7834a4a134cc)
---
 llvm/include/llvm/CodeGen/FastISel.h       |  5 +++-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 10 +++++---
 llvm/test/CodeGen/X86/pr49467.ll           | 27 ++++++++++++++++++++++
 3 files changed, 38 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr49467.ll

diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index 81c1d6aad49a..26bf4ab2618c 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -490,7 +490,10 @@ class FastISel {
   /// - \c Add has a constant operand.
   bool canFoldAddIntoGEP(const User *GEP, const Value *Add);
 
-  /// Test whether the given value has exactly one use.
+  /// Test whether the register associated with this value has exactly one use,
+  /// in which case that single use is killing. Note that multiple IR values
+  /// may map onto the same register, in which case this is not the same as
+  /// checking that an IR value has one use.
   bool hasTrivialKill(const Value *V);
 
   /// Create a machine mem operand from the given instruction.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 62f7f3d98ba6..0ff77d4ba1ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -261,12 +261,16 @@ bool FastISel::hasTrivialKill(const Value *V) {
     if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
       return false;
 
+  // Casts and extractvalues may be trivially coalesced by fast-isel.
+  if (I->getOpcode() == Instruction::BitCast ||
+      I->getOpcode() == Instruction::PtrToInt ||
+      I->getOpcode() == Instruction::IntToPtr ||
+      I->getOpcode() == Instruction::ExtractValue)
+    return false;
+
   // Only instructions with a single use in the same basic block are considered
   // to have trivial kills.
   return I->hasOneUse() &&
-         !(I->getOpcode() == Instruction::BitCast ||
-           I->getOpcode() == Instruction::PtrToInt ||
-           I->getOpcode() == Instruction::IntToPtr) &&
          cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
 }
 
diff --git a/llvm/test/CodeGen/X86/pr49467.ll b/llvm/test/CodeGen/X86/pr49467.ll
new file mode 100644
index 000000000000..9b3502552066
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49467.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=x86_64 < %s | FileCheck %s
+
+declare { i8*, i64 } @get()
+
+declare void @use(i8*, i64)
+
+define void @test(i64* %p) nounwind {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movq %rdi, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    callq get@PLT
+; CHECK-NEXT:    movq (%rsp), %rdi # 8-byte Reload
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rsi, (%rdi)
+; CHECK-NEXT:    # implicit-def: $rdi
+; CHECK-NEXT:    callq use@PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %struct = call { i8*, i64 } @get()
+  %struct.1 = extractvalue { i8*, i64 } %struct, 1
+  store i64 %struct.1, i64* %p, align 8
+  %struct.2 = extractvalue { i8*, i64 } %struct, 1
+  call void @use(i8* undef, i64 %struct.2)
+  ret void
+}

From 79a79d1d01c4c206d8de3569c72747587d929770 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 14 Mar 2021 16:39:03 +0100
Subject: [PATCH 198/244] [X86] Add test for PR49587 (NFC)

Shows a miscompile with FastISel.

(cherry picked from commit 0d814ca0f02733d6581bf209fadbebf3035380e0)
---
 llvm/test/CodeGen/X86/pr49587.ll | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr49587.ll

diff --git a/llvm/test/CodeGen/X86/pr49587.ll b/llvm/test/CodeGen/X86/pr49587.ll
new file mode 100644
index 000000000000..343f1a0149c0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr49587.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -fast-isel -mtriple=x86_64-- < %s | FileCheck %s
+
+define i32 @test(i64 %arg) nounwind {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $1, %rdi
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    jb .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %no_overflow
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    jmp .LBB0_2
+; CHECK-NEXT:  .LBB0_2: # %merge
+; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    retq
+entry:
+  %usubo = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %arg, i64 1)
+  %overflow = extractvalue { i64, i1 } %usubo, 1
+  br i1 %overflow, label %merge, label %no_overflow
+
+no_overflow:
+  br label %merge
+
+merge:
+  %phi = phi i32 [ 1, %no_overflow ], [ 0, %entry ]
+  ret i32 %phi
+}
+
+declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64)

From 38dd45b00431e2c065e172751492e0ded59e49e6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 14 Mar 2021 16:47:41 +0100
Subject: [PATCH 199/244] [X86][FastISel] Fix with.overflow eflags clobber
 (PR49587)

If the successor block has a phi node, then additional moves may
be inserted into predecessors, which may clobber eflags. Don't try
to fold the with.overflow result into the branch in that case.

This is done by explicitly checking for any phis in successor
blocks, not sure if there's some more principled way to address
this. Other fused compare and branch patterns avoid the issue by
emitting the comparison when handling the branch, so that no
instructions may be inserted in between. In this case, the
with.overflow call is emitted separately (and I don't think this
is avoidable, as it will generally have at least two users).

Fixes https://bugs.llvm.org/show_bug.cgi?id=49587.

Differential Revision: https://reviews.llvm.org/D98600

(cherry picked from commit 7669455df49e6fc8ae7d9f4bd4ee95bb20e7eb6e)
---
 llvm/lib/Target/X86/X86FastISel.cpp | 8 ++++++++
 llvm/test/CodeGen/X86/pr49587.ll    | 5 +++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index caf158102230..a1a16a19f5e5 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -284,6 +284,14 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
       return false;
   }
 
+  // Make sure no potentially eflags clobbering phi moves can be inserted in
+  // between.
+  auto HasPhis = [](const BasicBlock *Succ) {
+    return !llvm::empty(Succ->phis());
+  };
+  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
+    return false;
+
   CC = TmpCC;
   return true;
 }
diff --git a/llvm/test/CodeGen/X86/pr49587.ll b/llvm/test/CodeGen/X86/pr49587.ll
index 343f1a0149c0..7dc54a526608 100644
--- a/llvm/test/CodeGen/X86/pr49587.ll
+++ b/llvm/test/CodeGen/X86/pr49587.ll
@@ -5,10 +5,11 @@ define i32 @test(i64 %arg) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    subq $1, %rdi
-; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    setb %cl
 ; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb $1, %cl
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT:    jb .LBB0_2
+; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %no_overflow
 ; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill

From 5b3480610383ba281ef0c7918a6c097058a408d4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 12 Mar 2021 14:15:27 -0500
Subject: [PATCH 200/244] [InstCombine] add test for zext-of-icmps; NFC

PR49475 shows an infinite loop outcome, but this
tries to show the root cause with a minimal test.

(cherry picked from commit 579b8fc2e97c489308f97b01d13d894c03c0a16c)
---
 .../Transforms/InstCombine/zext-or-icmp.ll    | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index a77aa7ac7ebd..54ae0858aa67 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -106,3 +106,23 @@ block2:
   %conv2 = zext i1 %cmp1 to i32
   ret i32 %conv2
 }
+
+; FIXME: This should not end with more instructions than it started from.
+
+define i32 @PR49475(i32 %x, i16 %y) {
+; CHECK-LABEL: @PR49475(
+; CHECK-NEXT:    [[M:%.*]] = and i16 [[Y:%.*]], 1
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[B11:%.*]] = zext i1 [[B1]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[M]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[Z3:%.*]] = or i32 [[B11]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[Z3]]
+;
+  %m = and i16 %y, 1
+  %b1 = icmp eq i32 %x, 0
+  %b2 = icmp eq i16 %m, 0
+  %t1 = or i1 %b1, %b2
+  %z = zext i1 %t1 to i32
+  ret i32 %z
+}

From ff2cf8fafa5ad9a76e59fa086d969d4e2ecc3a39 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 13 Mar 2021 08:26:27 -0500
Subject: [PATCH 201/244] [InstCombine] avoid creating an extra instruction in
 zext fold and possible inf-loop

The structure of this fold is suspect vs. most of instcombine
because it creates instructions and tries to delete them
immediately after.

If we don't have the operand types for the icmps, then we are
not behaving as assumed. And as shown in PR49475, we can inf-loop.

(cherry picked from commit 4224a36957420744756d6a6450eb6502a1bfadc3)
---
 .../InstCombine/InstCombineCasts.cpp          |  1 +
 .../Transforms/InstCombine/zext-or-icmp.ll    | 58 +++++++++++++++++--
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 0b53007bb6dc..07e68c44416d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1270,6 +1270,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
     ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
     ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
       // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index 54ae0858aa67..5ae3d8ea0dba 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -107,17 +107,16 @@ block2:
   ret i32 %conv2
 }
 
-; FIXME: This should not end with more instructions than it started from.
+; This should not end with more instructions than it started from.
 
 define i32 @PR49475(i32 %x, i16 %y) {
 ; CHECK-LABEL: @PR49475(
 ; CHECK-NEXT:    [[M:%.*]] = and i16 [[Y:%.*]], 1
 ; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[B11:%.*]] = zext i1 [[B1]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[M]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
-; CHECK-NEXT:    [[Z3:%.*]] = or i32 [[B11]], [[TMP2]]
-; CHECK-NEXT:    ret i32 [[Z3]]
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[M]], 0
+; CHECK-NEXT:    [[T1:%.*]] = or i1 [[B1]], [[B2]]
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[T1]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
 ;
   %m = and i16 %y, 1
   %b1 = icmp eq i32 %x, 0
@@ -126,3 +125,50 @@ define i32 @PR49475(i32 %x, i16 %y) {
   %z = zext i1 %t1 to i32
   ret i32 %z
 }
+
+; This would infinite-loop.
+
+define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) {
+; CHECK-LABEL: @PR49475_infloop(
+; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[T0:%.*]], 0
+; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0
+; CHECK-NEXT:    [[T1:%.*]] = or i1 [[B]], [[B2]]
+; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[T1]] to i32
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[EXT]], [[T0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[AND]], 140
+; CHECK-NEXT:    [[XOR1:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[CONV16:%.*]] = sext i8 [[I162:%.*]] to i64
+; CHECK-NEXT:    [[SUB17:%.*]] = sub i64 [[CONV16]], [[E:%.*]]
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[SUB17]], 32
+; CHECK-NEXT:    [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i64 [[CONV18]], [[XOR1]]
+; CHECK-NEXT:    [[CONV19:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]]
+; CHECK-NEXT:    [[TRUNC44:%.*]] = trunc i16 [[OR21]] to i8
+; CHECK-NEXT:    [[INC:%.*]] = or i8 [[TRUNC44]], [[I162]]
+; CHECK-NEXT:    [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TOBOOL23_NOT]])
+; CHECK-NEXT:    ret i8 [[INC]]
+;
+  %b = icmp eq i32 %t0, 0
+  %b2 = icmp eq i16 %insert, 0
+  %t1 = or i1 %b, %b2
+  %ext = zext i1 %t1 to i32
+  %and = and i32 %t0, %ext
+  %conv13 = zext i32 %and to i64
+  %xor = xor i64 %conv13, 140
+  %conv16 = sext i8 %i162 to i64
+  %sub17 = sub i64 %conv16, %e
+  %sext = shl i64 %sub17, 32
+  %conv18 = ashr exact i64 %sext, 32
+  %cmp = icmp sge i64 %xor, %conv18
+  %conv19 = zext i1 %cmp to i16
+  %or21 = or i16 %insert, %conv19
+  %trunc44 = trunc i16 %or21 to i8
+  %inc = add i8 %i162, %trunc44
+  %tobool23.not = icmp eq i16 %or21, 0
+  call void @llvm.assume(i1 %tobool23.not)
+  ret i8 %inc
+}
+
+declare void @llvm.assume(i1 noundef)

From 9ae9ab1ca34384e07b751c16645e22a0b953b08b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung@sifive.com>
Date: Tue, 30 Mar 2021 14:30:15 -0700
Subject: [PATCH 202/244] [RISCV][MC] Fix nf encoding for vector ld/st whole
 register

The three bit nf is one less than the number of NFIELDS,
so we manually decrement 1 for VS1/2/4/8R & VL1/2/4/8R.

Differential revision: https://reviews.llvm.org/D98185

(cherry picked from commit rG5cdb2e98608bf57c216ee7067e8a12d070c9e2bd)
---
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 16 +++---
 llvm/test/MC/RISCV/rvv/aliases.s         | 16 +++---
 llvm/test/MC/RISCV/rvv/load.s            | 64 ++++++++++++------------
 llvm/test/MC/RISCV/rvv/store.s           | 16 +++---
 4 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 86fbc73d81d5..b3fc76aee161 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -504,19 +504,19 @@ def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
 def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
 def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
 
-defm VL1R : VWholeLoad<1, "vl1r">;
-defm VL2R : VWholeLoad<2, "vl2r">;
-defm VL4R : VWholeLoad<4, "vl4r">;
-defm VL8R : VWholeLoad<8, "vl8r">;
+defm VL1R : VWholeLoad<0, "vl1r">;
+defm VL2R : VWholeLoad<1, "vl2r">;
+defm VL4R : VWholeLoad<3, "vl4r">;
+defm VL8R : VWholeLoad<7, "vl8r">;
 def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VR:$vd, GPR:$rs1)>;
 def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VR:$vd, GPR:$rs1)>;
 
-def VS1R_V : VWholeStore<1, "vs1r.v">;
-def VS2R_V : VWholeStore<2, "vs2r.v">;
-def VS4R_V : VWholeStore<4, "vs4r.v">;
-def VS8R_V : VWholeStore<8, "vs8r.v">;
+def VS1R_V : VWholeStore<0, "vs1r.v">;
+def VS2R_V : VWholeStore<1, "vs2r.v">;
+def VS4R_V : VWholeStore<3, "vs4r.v">;
+def VS8R_V : VWholeStore<7, "vs8r.v">;
 
 // Vector Single-Width Integer Add and Subtract
 defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s
index 2e5120c91e45..ebe9e79399a6 100644
--- a/llvm/test/MC/RISCV/rvv/aliases.s
+++ b/llvm/test/MC/RISCV/rvv/aliases.s
@@ -54,17 +54,17 @@ vmset.m v0
 # ALIAS:    vmnot.m v0, v1                  # encoding: [0x57,0xa0,0x10,0x76]
 # NO-ALIAS: vmnand.mm       v0, v1, v1      # encoding: [0x57,0xa0,0x10,0x76]
 vmnot.m v0, v1
-# ALIAS:    vl1r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
-# NO-ALIAS: vl1re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
+# ALIAS:    vl1r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
+# NO-ALIAS: vl1re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
 vl1r.v v0, (a0) 
-# ALIAS:    vl2r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x42]
-# NO-ALIAS: vl2re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x42]
+# ALIAS:    vl2r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
+# NO-ALIAS: vl2re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x22]
 vl2r.v v0, (a0) 
-# ALIAS:    vl4r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x82]
-# NO-ALIAS: vl4re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x82]
+# ALIAS:    vl4r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x62]
+# NO-ALIAS: vl4re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x62]
 vl4r.v v0, (a0) 
-# ALIAS:    vl8r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
-# NO-ALIAS: vl8re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0x02]
+# ALIAS:    vl8r.v          v0, (a0)        # encoding: [0x07,0x00,0x85,0xe2]
+# NO-ALIAS: vl8re8.v        v0, (a0)        # encoding: [0x07,0x00,0x85,0xe2]
 vl8r.v v0, (a0) 
 # ALIAS:    vneg.v          v2, v1, v0.t    # encoding: [0x57,0x41,0x10,0x0c]
 # NO-ALIAS: vrsub.vx        v2, v1, zero, v0.t # encoding: [0x57,0x41,0x10,0x0c]
diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s
index 3d0dbb15c36e..45a3881cb60d 100644
--- a/llvm/test/MC/RISCV/rvv/load.s
+++ b/llvm/test/MC/RISCV/rvv/load.s
@@ -256,96 +256,96 @@ vloxei64.v v8, (a0), v4
 
 vl1re8.v v8, (a0)
 # CHECK-INST: vl1re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x04,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 22 <unknown>
+# CHECK-UNKNOWN: 07 04 85 02 <unknown>
 
 vl1re16.v v8, (a0)
 # CHECK-INST: vl1re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x54,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 22 <unknown>
+# CHECK-UNKNOWN: 07 54 85 02 <unknown>
 
 vl1re32.v v8, (a0)
 # CHECK-INST: vl1re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x64,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 22 <unknown>
+# CHECK-UNKNOWN: 07 64 85 02 <unknown>
 
 vl1re64.v v8, (a0)
 # CHECK-INST: vl1re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x22]
+# CHECK-ENCODING: [0x07,0x74,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 22 <unknown>
+# CHECK-UNKNOWN: 07 74 85 02 <unknown>
 
 vl2re8.v v8, (a0)
 # CHECK-INST: vl2re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x04,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 42 <unknown>
+# CHECK-UNKNOWN: 07 04 85 22 <unknown>
 
 vl2re16.v v8, (a0)
 # CHECK-INST: vl2re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x54,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 42 <unknown>
+# CHECK-UNKNOWN: 07 54 85 22 <unknown>
 
 vl2re32.v v8, (a0)
 # CHECK-INST: vl2re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x64,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 42 <unknown>
+# CHECK-UNKNOWN: 07 64 85 22 <unknown>
 
 vl2re64.v v8, (a0)
 # CHECK-INST: vl2re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x42]
+# CHECK-ENCODING: [0x07,0x74,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 42 <unknown>
+# CHECK-UNKNOWN: 07 74 85 22 <unknown>
 
 vl4re8.v v8, (a0)
 # CHECK-INST: vl4re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x04,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 82 <unknown>
+# CHECK-UNKNOWN: 07 04 85 62 <unknown>
 
 vl4re16.v v8, (a0)
 # CHECK-INST: vl4re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x54,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 82 <unknown>
+# CHECK-UNKNOWN: 07 54 85 62 <unknown>
 
 vl4re32.v v8, (a0)
 # CHECK-INST: vl4re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x64,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 82 <unknown>
+# CHECK-UNKNOWN: 07 64 85 62 <unknown>
 
 vl4re64.v v8, (a0)
 # CHECK-INST: vl4re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x82]
+# CHECK-ENCODING: [0x07,0x74,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 82 <unknown>
+# CHECK-UNKNOWN: 07 74 85 62 <unknown>
 
 vl8re8.v v8, (a0)
 # CHECK-INST: vl8re8.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x04,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x04,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 04 85 02 <unknown>
+# CHECK-UNKNOWN: 07 04 85 e2 <unknown>
 
 vl8re16.v v8, (a0)
 # CHECK-INST: vl8re16.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x54,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x54,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 54 85 02 <unknown>
+# CHECK-UNKNOWN: 07 54 85 e2 <unknown>
 
 vl8re32.v v8, (a0)
 # CHECK-INST: vl8re32.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x64,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x64,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 64 85 02 <unknown>
+# CHECK-UNKNOWN: 07 64 85 e2 <unknown>
 
 vl8re64.v v8, (a0)
 # CHECK-INST: vl8re64.v v8, (a0)
-# CHECK-ENCODING: [0x07,0x74,0x85,0x02]
+# CHECK-ENCODING: [0x07,0x74,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 07 74 85 02 <unknown>
+# CHECK-UNKNOWN: 07 74 85 e2 <unknown>
diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s
index e4795aa1c2c9..b5a75ac2d008 100644
--- a/llvm/test/MC/RISCV/rvv/store.s
+++ b/llvm/test/MC/RISCV/rvv/store.s
@@ -208,24 +208,24 @@ vsoxei64.v v24, (a0), v4
 
 vs1r.v v24, (a0)
 # CHECK-INST: vs1r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x22]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0x02]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 22 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 02 <unknown>
 
 vs2r.v v24, (a0)
 # CHECK-INST: vs2r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x42]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0x22]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 42 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 22 <unknown>
 
 vs4r.v v24, (a0)
 # CHECK-INST: vs4r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x82]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0x62]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 82 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 62 <unknown>
 
 vs8r.v v24, (a0)
 # CHECK-INST: vs8r.v v24, (a0)
-# CHECK-ENCODING: [0x27,0x0c,0x85,0x02]
+# CHECK-ENCODING: [0x27,0x0c,0x85,0xe2]
 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions)
-# CHECK-UNKNOWN: 27 0c 85 02 <unknown>
+# CHECK-UNKNOWN: 27 0c 85 e2 <unknown>

From 31001be371e8f2c74470e727e54503fb2aabec8b Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Wed, 17 Mar 2021 16:59:55 +0100
Subject: [PATCH 203/244] [LoopVectorize] Refine hasIrregularType predicate

The `hasIrregularType` predicate checks whether an array of N values of type Ty is "bitcast-compatible" with a <N x Ty> vector.
The previous check returned invalid results in some cases where there's some padding between the array elements: eg. a 4-element array of u7 values is considered as compatible with <4 x u7>, even though the vector is only loading/storing 28 bits instead of 32.

The problem causes LLVM to generate incorrect code for some targets: for AArch64 the vector loads/stores are lowered in terms of ubfx/bfi, effectively losing the top (N * padding bits).

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D97465

(cherry picked from commit 4f024938e4c932feba4d28573ec4522106f8d879)
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 22 +++++----------
 .../LoopVectorize/irregular_type.ll           | 27 +++++++++++++++++++
 2 files changed, 34 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/irregular_type.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d36e078444bc..b456a97aa4ec 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) {
 
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type at the given vectorization factor.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
-  // Determine if an array of VF elements of type Ty is "bitcast compatible"
-  // with a <VF x Ty> vector.
-  if (VF.isVector()) {
-    auto *VectorTy = VectorType::get(Ty, VF);
-    return TypeSize::get(VF.getKnownMinValue() *
-                             DL.getTypeAllocSize(Ty).getFixedValue(),
-                         VF.isScalable()) != DL.getTypeStoreSize(VectorTy);
-  }
-
-  // If the vectorization factor is one, we just check if an array of type Ty
-  // requires padding between elements.
+/// element of the corresponding vector type.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+  // Determine if an array of N elements of type Ty is "bitcast compatible"
+  // with a <N x Ty> vector.
+  // This is only true if there is no padding between the array elements.
   return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
 }
 
@@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = getMemInstValueType(I);
-  if (hasIrregularType(ScalarTy, DL, VF))
+  if (hasIrregularType(ScalarTy, DL))
     return false;
 
   // Check if masking is required.
@@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
   // requires padding and will be scalarized.
   auto &DL = I->getModule()->getDataLayout();
   auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
-  if (hasIrregularType(ScalarTy, DL, VF))
+  if (hasIrregularType(ScalarTy, DL))
     return false;
 
   return true;
diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
new file mode 100644
index 000000000000..167a1a101e6f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
@@ -0,0 +1,27 @@
+; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+; Ensure the array loads/stores are not optimized into vector operations when
+; the element type has padding bits.
+
+; CHECK: foo
+; CHECK: vector.body
+; CHECK-NOT: load <4 x i7>
+; CHECK-NOT: store <4 x i7>
+; CHECK: for.body
+define void @foo(i7* %a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv
+  %0 = load i7, i7* %arrayidx, align 1
+  %sub = add nuw nsw i7 %0, 0
+  store i7 %sub, i7* %arrayidx, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cmp, label %for.exit, label %for.body
+
+for.exit:
+  ret void
+}

From 04ba60cfe598e41084fb848daae47e0ed910fa7d Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Sun, 28 Mar 2021 16:30:47 -0700
Subject: [PATCH 204/244] [ORC][C-bindings] Fix some ORC C bindings function
 names and signatures.

LLVMOrcDisposeObjectLayer and LLVMOrcExecutionSessionGetJITDylibByName did not
have matching signatures between the C-API header and binding implementations.
Fixes http://llvm.org/PR49745.

Patch by Mats Larsen. Thanks Mats!

Reviewed by: lhames

Differential Revision: https://reviews.llvm.org/D99478

(cherry picked from commit 666df2e2cbe9fc252d3b2d6cbb214c2c2f6afc65)
---
 llvm/include/llvm-c/Orc.h                       | 7 ++++---
 llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index 183107c148a6..9beef44c89dd 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -339,8 +339,7 @@ LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT);
  * ownership has not been passed to a JITDylib (e.g. because some error
  * prevented the client from calling LLVMOrcJITDylibAddGenerator).
  */
-void LLVMOrcDisposeDefinitionGenerator(
-    LLVMOrcDefinitionGeneratorRef DG);
+void LLVMOrcDisposeDefinitionGenerator(LLVMOrcDefinitionGeneratorRef DG);
 
 /**
  * Dispose of a MaterializationUnit.
@@ -388,7 +387,9 @@ LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES,
  * Returns the JITDylib with the given name, or NULL if no such JITDylib
  * exists.
  */
-LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name);
+LLVMOrcJITDylibRef
+LLVMOrcExecutionSessionGetJITDylibByName(LLVMOrcExecutionSessionRef ES,
+                                         const char *Name);
 
 /**
  * Return a reference to a newly created resource tracker associated with JD.
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index dfdd2c6c669f..834d4cc8f514 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -393,7 +393,7 @@ void LLVMOrcDisposeJITTargetMachineBuilder(
   delete unwrap(JTMB);
 }
 
-void lLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
+void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer) {
   delete unwrap(ObjLayer);
 }
 

From d28af7c654d8db0b68c175db5ce212d74fb5e9bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= <l.lunak@centrum.cz>
Date: Tue, 6 Apr 2021 18:38:18 +0200
Subject: [PATCH 205/244] remove -fpch-codegen and -fpch-debuginfo from Clang
 12.0 release notes

These were new in 11.0. The commit adding the options landed after
11.x branch had already been branched off from master, and only
then backported to 11.x, so the release notes change stayed for 12.0.
---
 clang/docs/ReleaseNotes.rst | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f3499d167361..4cc1b0b9d2cf 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -82,31 +82,6 @@ New Compiler Flags
   in that case. The option's behaviour mirrors GCC, the helpers are implemented
   both in compiler-rt and libgcc.
 
-- -fpch-codegen and -fpch-debuginfo generate shared code and/or debuginfo
-  for contents of a precompiled header in a separate object file. This object
-  file needs to be linked in, but its contents do not need to be generated
-  for other objects using the precompiled header. This should usually save
-  compile time. If not using clang-cl, the separate object file needs to
-  be created explicitly from the precompiled header.
-  Example of use:
-
-  .. code-block:: console
-
-    $ clang++ -x c++-header header.h -o header.pch -fpch-codegen -fpch-debuginfo
-    $ clang++ -c header.pch -o shared.o
-    $ clang++ -c source.cpp -o source.o -include-pch header.pch
-    $ clang++ -o binary source.o shared.o
-
-  - Using -fpch-instantiate-templates when generating the precompiled header
-    usually increases the amount of code/debuginfo that can be shared.
-  - In some cases, especially when building with optimizations enabled, using
-    -fpch-codegen may generate so much code in the shared object that compiling
-    it may be a net loss in build time.
-  - Since headers may bring in private symbols of other libraries, it may be
-    sometimes necessary to discard unused symbols (such as by adding
-    -Wl,--gc-sections on ELF platforms to the linking command, and possibly
-    adding -fdata-sections -ffunction-sections to the command generating
-    the shared object).
 - New option ``-fbinutils-version=`` specifies the targeted binutils version.
   For example, ``-fbinutils-version=2.35`` means compatibility with GNU as/ld
   before 2.35 is not needed: new features can be used and there is no need to

From 52256996c09c4e9c81f9dd1b2ee3dc7c411453e7 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Tue, 12 Mar 2019 14:35:12 +0100
Subject: [PATCH 206/244] ScalarEvolution: mark destructor virtual

---
 llvm/include/llvm/Analysis/ScalarEvolution.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index b3f199de2cfa..01ff82801b3a 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -209,7 +209,7 @@ class SCEVPredicate : public FoldingSetNode {
 
 protected:
   SCEVPredicateKind Kind;
-  ~SCEVPredicate() = default;
+  virtual ~SCEVPredicate() = default;
   SCEVPredicate(const SCEVPredicate &) = default;
   SCEVPredicate &operator=(const SCEVPredicate &) = default;
 

From 98ed293c8f3d49bbf9bbc7ee872939e22124acba Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Wed, 11 Nov 2020 15:22:13 +0100
Subject: [PATCH 207/244] IRBuilderCallbackInserter: override destructor

The destructor was virtual but not marked override, causing warnings
(and errors in revng if -Wall) was enabled.
This commit fixes these problems
---
 llvm/include/llvm/IR/IRBuilder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 9cefc9aa764c..77a8aad1ee17 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -78,7 +78,7 @@ class IRBuilderCallbackInserter : public IRBuilderDefaultInserter {
   std::function<void(Instruction *)> Callback;
 
 public:
-  virtual ~IRBuilderCallbackInserter();
+  virtual ~IRBuilderCallbackInserter() override;
 
   IRBuilderCallbackInserter(std::function<void(Instruction *)> Callback)
       : Callback(std::move(Callback)) {}

From 86d64d5e52f6799485ce984fd1e44281ab3acb0b Mon Sep 17 00:00:00 2001
From: Massimo Fioravanti <massimofioravanti@rev.ng>
Date: Fri, 24 Jan 2020 14:59:32 +0100
Subject: [PATCH 208/244] Add virtual destructor to `DWARFContext`

`virtual` classes should always have a virtual destructor.
---
 llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 7d88e1447dca..61c2657cd46e 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -122,7 +122,7 @@ class DWARFContext : public DIContext {
                    WithColor::defaultErrorHandler,
                std::function<void(Error)> WarningHandler =
                    WithColor::defaultWarningHandler);
-  ~DWARFContext();
+  virtual ~DWARFContext() override;
 
   DWARFContext(DWARFContext &) = delete;
   DWARFContext &operator=(DWARFContext &) = delete;

From 15245c3b2b2ebdca3ec5459c36daadfd71fc729b Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Tue, 29 Dec 2020 20:26:29 +0100
Subject: [PATCH 209/244] Disable failing Mach-O test

---
 llvm/test/tools/llvm-objdump/MachO/bad-bind.test | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/test/tools/llvm-objdump/MachO/bad-bind.test b/llvm/test/tools/llvm-objdump/MachO/bad-bind.test
index d11c4fb88395..f8d1ac6d8d22 100644
--- a/llvm/test/tools/llvm-objdump/MachO/bad-bind.test
+++ b/llvm/test/tools/llvm-objdump/MachO/bad-bind.test
@@ -67,9 +67,6 @@ WEAK-BIND-SET-DYLIB-ORDINAL-ULEB: macho-weak-bind-set-dylib-ordinal-uleb': trunc
 RUN: not llvm-objdump --macho --weak-bind %p/Inputs/macho-weak-bind-set-dylib-special-imm 2>&1 | FileCheck --check-prefix WEAK-BIND-SET-DYLIB-SPECIAL-IMM %s
 WEAK-BIND-SET-DYLIB-SPECIAL-IMM: macho-weak-bind-set-dylib-special-imm': truncated or malformed object (BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in weak bind table for opcode at: 0x2)
 
-RUN: not llvm-objdump --macho --rebase %p/Inputs/macho-rebase-set-type-imm 2>&1 | FileCheck --check-prefix REBASE-SET-TYPE-IMM %s
-REBASE-SET-TYPE-IMM: macho-rebase-set-type-imm': truncated or malformed object (for REBASE_OPCODE_SET_TYPE_IMM bad bind type: 5 for opcode at: 0x0)
-
 RUN: not llvm-objdump --macho --rebase %p/Inputs/macho-rebase-uleb-malformed-uleb128 2>&1 | FileCheck --check-prefix REBASE-ULEB-MALFORMED-ULEB128 %s
 REBASE-ULEB-MALFORMED-ULEB128: macho-rebase-uleb-malformed-uleb128': truncated or malformed object (for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB malformed uleb128, extends past end for opcode at: 0x1)
 

From 550ccf8086aff0059764fba228fa1f13f68bdd14 Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Tue, 25 May 2021 12:33:50 +0200
Subject: [PATCH 210/244] Fix ambiguous operator!= on enums in elf2yaml.cpp

---
 llvm/tools/obj2yaml/elf2yaml.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index c85e2653b655..c0625be1131e 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -200,7 +200,7 @@ bool ELFDumper<ELFT>::shouldPrintSection(const ELFYAML::Section &S,
     if (const ELFYAML::RawContentSection *RawSec =
             dyn_cast<const ELFYAML::RawContentSection>(&S)) {
       if (RawSec->Type != ELF::SHT_PROGBITS || RawSec->Link || RawSec->Info ||
-          RawSec->AddressAlign != 1 || RawSec->Address || RawSec->EntSize)
+          (RawSec->AddressAlign != (yaml::Hex64)1) || RawSec->Address || RawSec->EntSize)
         return true;
 
       ELFYAML::ELF_SHF ShFlags = RawSec->Flags.getValueOr(ELFYAML::ELF_SHF(0));
@@ -208,7 +208,7 @@ bool ELFDumper<ELFT>::shouldPrintSection(const ELFYAML::Section &S,
       if (SecName == "debug_str")
         return ShFlags != ELFYAML::ELF_SHF(ELF::SHF_MERGE | ELF::SHF_STRINGS);
 
-      return ShFlags != 0;
+      return ShFlags != ELFYAML::ELF_SHF(0);
     }
   }
 

From 5d62970b68ca324bd1e8b319056231dda2d8a8d5 Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Tue, 25 May 2021 12:40:33 +0200
Subject: [PATCH 211/244] APFixedPointTest.cpp: fix C++20 compilation error

In file included from /orchestra/sources/llvm/llvm/unittests/ADT/APFixedPointTest.cpp:12:
/orchestra/sources/llvm/llvm/utils/unittest/googletest/include/gtest/gtest.h:1392:11: error: use of overloaded operator '==' is ambiguous (with operand types 'const llvm::APSInt' and 'const unsigned long')
  if (lhs == rhs) {
      ~~~ ^  ~~~
/orchestra/sources/llvm/llvm/utils/unittest/googletest/include/gtest/gtest.h:1421:12: note: in instantiation of function template specialization 'testing::internal::CmpHelperEQ<llvm::APSInt, unsigned long>' requested here
    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
           ^
/orchestra/sources/llvm/llvm/unittests/ADT/APFixedPointTest.cpp:227:3: note: in instantiation of function template specialization 'testing::internal::EqHelper<false>::Compare<llvm::APSInt, unsigned long>' requested here
  ASSERT_EQ(APFixedPoint::getMax(Sema).getIntPart(), Expected);
  ^
/orchestra/sources/llvm/llvm/utils/unittest/googletest/include/gtest/gtest.h:1956:32: note: expanded from macro 'ASSERT_EQ'
                               ^
/orchestra/sources/llvm/llvm/utils/unittest/googletest/include/gtest/gtest.h:1939:63: note: expanded from macro 'GTEST_ASSERT_EQ'
                      EqHelper<GTEST_IS_NULL_LITERAL_(val1)>::Compare, \
                                                              ^
/orchestra/sources/llvm/llvm/include/llvm/ADT/APInt.h:2035:13: note: candidate function (with reversed parameter order)
inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
            ^
/orchestra/sources/llvm/llvm/include/llvm/ADT/APSInt.h:176:8: note: candidate function
  bool operator==(int64_t RHS) const {
       ^
/orchestra/sources/llvm/llvm/include/llvm/ADT/APSInt.h:339:13: note: candidate function (with reversed parameter order)
inline bool operator==(int64_t V1, const APSInt &V2) { return V2 == V1; }
            ^
1 error generated.
---
 llvm/unittests/ADT/APFixedPointTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/unittests/ADT/APFixedPointTest.cpp b/llvm/unittests/ADT/APFixedPointTest.cpp
index 53fa1cd8b503..8891889c62ee 100644
--- a/llvm/unittests/ADT/APFixedPointTest.cpp
+++ b/llvm/unittests/ADT/APFixedPointTest.cpp
@@ -223,7 +223,7 @@ void CheckIntPartMin(const FixedPointSemantics &Sema, int64_t Expected) {
   ASSERT_EQ(APFixedPoint::getMin(Sema).getIntPart(), Expected);
 }
 
-void CheckIntPartMax(const FixedPointSemantics &Sema, uint64_t Expected) {
+void CheckIntPartMax(const FixedPointSemantics &Sema, int64_t Expected) {
   ASSERT_EQ(APFixedPoint::getMax(Sema).getIntPart(), Expected);
 }
 

From 08de5cd4af1ca58786018c86667e21666c1775ab Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Tue, 25 May 2021 14:26:42 +0200
Subject: [PATCH 212/244] Fix C++20 UTF8 string literals

Since C++20, UTF8 string literals (in the form `u8"blabla"`) have type
`const char8_t[N]`, which does not play nice with llvm::StringRef and
llvm::StringLiteral.

This commit fixes compilation of DJBTest.cpp and in C++20 explicitly
converting those strings literals to regular string literals.

To fix compilation of DJBTest.cpp, we had to define a new UTF8 based
constructor for StringRef and one for StringLiteral.
These cannot be constexpr because const char8_t * cannot be converted at
compile time to const char *.

With these new constructors, the StringRef(data, length) constructor
becomes ambiguous if used with nullptr as first argument, such as
  StringRef(nullptr, 0);
This was done in XCOFFObjectFile.cpp that was fixed to use the default
StringRef constructor.
---
 llvm/include/llvm/ADT/StringRef.h       | 41 ++++++++++++++++++++++++-
 llvm/include/llvm/Support/raw_ostream.h |  9 ++++++
 llvm/lib/Object/XCOFFObjectFile.cpp     |  2 +-
 llvm/unittests/Support/DJBTest.cpp      |  2 +-
 llvm/unittests/Support/JSONTest.cpp     |  8 ++---
 5 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 98c120fe2d2e..1f44aaff7b4f 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -76,6 +76,7 @@ namespace llvm {
       return ::memcmp(Lhs,Rhs,Length);
     }
 
+  protected:
     // Constexpr version of std::strlen.
     static constexpr size_t strLen(const char *Str) {
 #if __cplusplus > 201402L
@@ -91,6 +92,13 @@ namespace llvm {
 #endif
     }
 
+#if __cpp_char8_t
+    // Constexpr version of std::strlen for char8_t
+    static constexpr size_t strLen(const char8_t *Str) {
+      return std::char_traits<char8_t>::length(Str);
+    }
+#endif
+
   public:
     /// @name Constructors
     /// @{
@@ -106,10 +114,22 @@ namespace llvm {
     /*implicit*/ constexpr StringRef(const char *Str)
         : Data(Str), Length(Str ? strLen(Str) : 0) {}
 
+#if __cpp_char8_t
+    /// Construct a string ref from a cstring.
+    /*implicit*/ StringRef(const char8_t *Str)
+        : StringRef(reinterpret_cast<const char *>(Str)) {}
+#endif
+
     /// Construct a string ref from a pointer and length.
     /*implicit*/ constexpr StringRef(const char *data, size_t length)
         : Data(data), Length(length) {}
 
+#if __cpp_char8_t
+    /// Construct a string ref from a pointer and length.
+    /*implicit*/ StringRef(const char8_t *data, size_t length)
+        : StringRef(reinterpret_cast<const char *>(data), length) {}
+#endif
+
     /// Construct a string ref from an std::string.
     /*implicit*/ StringRef(const std::string &Str)
       : Data(Str.data()), Length(Str.length()) {}
@@ -874,19 +894,38 @@ namespace llvm {
     constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
     }
 
+#if __cpp_char8_t
+    StringLiteral(const char8_t *Str, size_t N) : StringRef(Str, N) {
+    }
+#endif
+
   public:
     template <size_t N>
     constexpr StringLiteral(const char (&Str)[N])
 #if defined(__clang__) && __has_attribute(enable_if)
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wgcc-compat"
-        __attribute((enable_if(__builtin_strlen(Str) == N - 1,
+        __attribute((enable_if(strLen(Str) == N - 1,
                                "invalid string literal")))
 #pragma clang diagnostic pop
 #endif
         : StringRef(Str, N - 1) {
     }
 
+#if __cpp_char8_t
+    template <size_t N>
+    StringLiteral(const char8_t (&Str)[N])
+#if defined(__clang__) && __has_attribute(enable_if)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wgcc-compat"
+        __attribute((enable_if(strLen(Str) == N - 1,
+                               "invalid string literal")))
+#pragma clang diagnostic pop
+#endif
+        : StringRef(Str, N - 1) {
+    }
+#endif
+
     // Explicit construction for strings like "foo\0bar".
     template <size_t N>
     static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index 7d572fe06f6f..2ff398d691f7 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -221,6 +221,15 @@ class raw_ostream {
     return this->operator<<(StringRef(Str));
   }
 
+#if __cpp_char8_t
+  raw_ostream &operator<<(const char8_t *Str) {
+    // Inline fast path, particularly for constant strings where a sufficiently
+    // smart compiler will simplify strlen.
+
+    return this->operator<<(StringRef(Str));
+  }
+#endif
+
   raw_ostream &operator<<(const std::string &Str) {
     // Avoid the fast path, it would only increase code size for a marginal win.
     return write(Str.data(), Str.length());
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index a16a458168d4..d719a288981e 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -166,7 +166,7 @@ XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const {
   // field; as a soft-error recovery mechanism, we treat such cases as having an
   // offset of 0.
   if (Offset < 4)
-    return StringRef(nullptr, 0);
+    return StringRef();
 
   if (StringTable.Data != nullptr && StringTable.Size > Offset)
     return (StringTable.Data + Offset);
diff --git a/llvm/unittests/Support/DJBTest.cpp b/llvm/unittests/Support/DJBTest.cpp
index c01bbe1eaa67..c9d1d9fc5dd2 100644
--- a/llvm/unittests/Support/DJBTest.cpp
+++ b/llvm/unittests/Support/DJBTest.cpp
@@ -18,7 +18,7 @@ TEST(DJBTest, caseFolding) {
     StringLiteral Two;
   };
 
-  static constexpr TestCase Tests[] = {
+  static TestCase Tests[] = {
       {{"ASDF"}, {"asdf"}},
       {{"qWeR"}, {"QwEr"}},
       {{"qqqqqqqqqqqqqqqqqqqq"}, {"QQQQQQQQQQQQQQQQQQQQ"}},
diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp
index ed9a72d36b06..2168f170f8f3 100644
--- a/llvm/unittests/Support/JSONTest.cpp
+++ b/llvm/unittests/Support/JSONTest.cpp
@@ -174,12 +174,12 @@ TEST(JSONTest, Parse) {
   Compare(R"("\"\\\b\f\n\r\t")", "\"\\\b\f\n\r\t");
   Compare(R"("\u0000")", llvm::StringRef("\0", 1));
   Compare("\"\x7f\"", "\x7f");
-  Compare(R"("\ud801\udc37")", u8"\U00010437"); // UTF16 surrogate pair escape.
-  Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", u8"\u20ac\U0001d11e"); // UTF8
+  Compare(R"("\ud801\udc37")", reinterpret_cast<const char *>(u8"\U00010437")); // UTF16 surrogate pair escape.
+  Compare("\"\xE2\x82\xAC\xF0\x9D\x84\x9E\"", reinterpret_cast<const char *>(u8"\u20ac\U0001d11e")); // UTF8
   Compare(
       R"("LoneLeading=\ud801, LoneTrailing=\udc01, LeadingLeadingTrailing=\ud801\ud801\udc37")",
-      u8"LoneLeading=\ufffd, LoneTrailing=\ufffd, "
-      u8"LeadingLeadingTrailing=\ufffd\U00010437"); // Invalid unicode.
+      reinterpret_cast<const char *>(u8"LoneLeading=\ufffd, LoneTrailing=\ufffd, "
+                                     u8"LeadingLeadingTrailing=\ufffd\U00010437")); // Invalid unicode.
 
   Compare(R"({"":0,"":0})", Object{{"", 0}});
   Compare(R"({"obj":{},"arr":[]})", Object{{"obj", Object{}}, {"arr", {}}});

From 046ace35caf4b897dbbcb440821eccb509f1dd6f Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Thu, 4 Mar 2021 17:16:03 +0100
Subject: [PATCH 213/244] Fix warnings on deprecated-enum-enum-conversion

The exact warning was:
include/clang/AST/DeclarationName.h:210:52: error: arithmetic between different enumeration types ('clang::DeclarationName::StoredNameKind' and 'clang::detail::DeclarationNameExtra::ExtraKind') is deprecated [-Werror,-Wdeprecated-enum-enum-conversion]
---
 clang/include/clang/AST/DeclarationName.h | 28 ++++++++++++++++-------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/AST/DeclarationName.h b/clang/include/clang/AST/DeclarationName.h
index 3cb0a02ff49b..b7334bf3d3a2 100644
--- a/clang/include/clang/AST/DeclarationName.h
+++ b/clang/include/clang/AST/DeclarationName.h
@@ -27,6 +27,7 @@
 #include <cstdint>
 #include <cstring>
 #include <string>
+#include <type_traits>
 
 namespace clang {
 
@@ -194,6 +195,14 @@ class DeclarationName {
                 "The various classes that DeclarationName::Ptr can point to"
                 " must be at least aligned to 8 bytes!");
 
+  static_assert(std::is_same<
+                    std::underlying_type_t<StoredNameKind>,
+                    std::underlying_type_t<detail::DeclarationNameExtra::ExtraKind>
+                >::value,
+                "The various enums used to compute values for NameKind should "
+                "all have the same underlying type"
+                );
+
 public:
   /// The kind of the name stored in this DeclarationName.
   /// The first 7 enumeration values are stored inline and correspond
@@ -207,15 +216,18 @@ class DeclarationName {
     CXXDestructorName = StoredCXXDestructorName,
     CXXConversionFunctionName = StoredCXXConversionFunctionName,
     CXXOperatorName = StoredCXXOperatorName,
-    CXXDeductionGuideName = UncommonNameKindOffset +
-                            detail::DeclarationNameExtra::CXXDeductionGuideName,
+    CXXDeductionGuideName =
+        static_cast<std::underlying_type_t<StoredNameKind>>(UncommonNameKindOffset) +
+        static_cast<std::underlying_type_t<detail::DeclarationNameExtra::ExtraKind>>(detail::DeclarationNameExtra::CXXDeductionGuideName),
     CXXLiteralOperatorName =
-        UncommonNameKindOffset +
-        detail::DeclarationNameExtra::CXXLiteralOperatorName,
-    CXXUsingDirective = UncommonNameKindOffset +
-                        detail::DeclarationNameExtra::CXXUsingDirective,
-    ObjCMultiArgSelector = UncommonNameKindOffset +
-                           detail::DeclarationNameExtra::ObjCMultiArgSelector
+        static_cast<std::underlying_type_t<StoredNameKind>>(UncommonNameKindOffset) +
+        static_cast<std::underlying_type_t<detail::DeclarationNameExtra::ExtraKind>>(detail::DeclarationNameExtra::CXXLiteralOperatorName),
+    CXXUsingDirective =
+        static_cast<std::underlying_type_t<StoredNameKind>>(UncommonNameKindOffset) +
+        static_cast<std::underlying_type_t<detail::DeclarationNameExtra::ExtraKind>>(detail::DeclarationNameExtra::CXXUsingDirective),
+    ObjCMultiArgSelector =
+        static_cast<std::underlying_type_t<StoredNameKind>>(UncommonNameKindOffset) +
+        static_cast<std::underlying_type_t<detail::DeclarationNameExtra::ExtraKind>>(detail::DeclarationNameExtra::ObjCMultiArgSelector),
   };
 
 private:

From 57aca719c6ae411ec793463528c908b5464b9023 Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Tue, 1 Jun 2021 10:22:27 +0200
Subject: [PATCH 214/244] Disable failing tests on file permissions

These tests pass successfully when running as unprivileged user, but
fail if run as `root`, because the superuser has privileges to ignore
file permissions.

This is a known issue, reported e.g. by Gentoo as well:
  https://bugs.gentoo.org/775050

For now, disable these tests, since they fail in docker on our CI.
---
 .../llvm-ar/error-opening-permission.test      | 14 --------------
 .../tools/llvm-elfabi/fail-file-write.test     | 18 ------------------
 2 files changed, 32 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-ar/error-opening-permission.test
 delete mode 100644 llvm/test/tools/llvm-elfabi/fail-file-write.test

diff --git a/llvm/test/tools/llvm-ar/error-opening-permission.test b/llvm/test/tools/llvm-ar/error-opening-permission.test
deleted file mode 100644
index d8d0cbfd3556..000000000000
--- a/llvm/test/tools/llvm-ar/error-opening-permission.test
+++ /dev/null
@@ -1,14 +0,0 @@
-## Unsupported on windows as marking files "unreadable"
-## is non-trivial on windows.
-# UNSUPPORTED: system-windows
-
-# RUN: rm -rf %t && mkdir -p %t
-# RUN: echo file1 > %t/1.txt
-
-## No Permission:
-# RUN: llvm-ar rc %t/permission.b %t/1.txt
-# RUN: chmod 100 %t/permission.b
-# RUN: not llvm-ar p %t/permission.b 2>&1 | \
-# RUN:   FileCheck %s --check-prefix=NO-PERMISSION -DARCHIVE=%t/permission.b
-
-# NO-PERMISSION: error: unable to open '[[ARCHIVE]]': {{.*}}{{[pP]}}ermission denied
diff --git a/llvm/test/tools/llvm-elfabi/fail-file-write.test b/llvm/test/tools/llvm-elfabi/fail-file-write.test
deleted file mode 100644
index c3c479e73887..000000000000
--- a/llvm/test/tools/llvm-elfabi/fail-file-write.test
+++ /dev/null
@@ -1,18 +0,0 @@
-## Test failing to write output file on non-windows platforms.
-
-# UNSUPPORTED: system-windows
-# RUN: rm -rf %t.TestDir
-# RUN: mkdir %t.TestDir
-# RUN: touch %t.TestDir/Output.TestFile
-# RUN: chmod 400 %t.TestDir
-# RUN: not llvm-elfabi %s --output-target=elf64-little %t.TestDir/Output.TestFile 2>&1 | FileCheck %s --check-prefix=ERR
-# RUN: chmod 777 %t.TestDir
-# RUN: rm -rf %t.TestDir
-
---- !tapi-tbe
-TbeVersion: 1.0
-Arch: AArch64
-Symbols: {}
-...
-
-# ERR: {{.*}}Permission denied{{.*}} when trying to open `{{.*}}.TestDir/Output.TestFile` for writing

From 14275ae801b233f05fac7100ce339e79c12cb54a Mon Sep 17 00:00:00 2001
From: Joachim Meyer <joachim@joameyer.de>
Date: Thu, 6 May 2021 22:26:19 +0200
Subject: [PATCH 215/244] [NFC] Correctly assert the indents for
 printEnumValHelpStr.

Only verify that there's no negative indent.
Noted by @chapuni in https://reviews.llvm.org/D93494.

Reviewed By: chapuni

Differential Revision: https://reviews.llvm.org/D102021
---
 llvm/lib/Support/CommandLine.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e2f014d1815b..123a23a5242c 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1729,7 +1729,7 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
 void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
                                  size_t FirstLineIndentedBy) {
   const StringRef ValHelpPrefix = "  ";
-  assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
+  assert(BaseIndent >= FirstLineIndentedBy);
   std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
   outs().indent(BaseIndent - FirstLineIndentedBy)
       << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";

From b78ddb59616485dc39b846d451b5ff822353c6ff Mon Sep 17 00:00:00 2001
From: Evgeny Mandrikov <mandrikov@gmail.com>
Date: Thu, 6 Jan 2022 17:01:05 +0100
Subject: [PATCH 216/244] Fix build failure with GCC 11 in C++20 mode

See https://wg21.link/cwg2237

Reviewed By: shafik, dexonsmith

Differential Revision: https://reviews.llvm.org/D115355
---
 llvm/include/llvm/CodeGen/LiveInterval.h      | 2 +-
 llvm/include/llvm/Support/BinaryStreamArray.h | 2 +-
 llvm/lib/Passes/StandardInstrumentations.cpp  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index c2b158ac1b7f..2165f1f7d761 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -724,7 +724,7 @@ namespace llvm {
       T *P;
 
     public:
-      SingleLinkedListIterator<T>(T *P) : P(P) {}
+      SingleLinkedListIterator(T *P) : P(P) {}
 
       SingleLinkedListIterator<T> &operator++() {
         P = P->Next;
diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h
index 3ba65c07cfe2..6c822c159ed5 100644
--- a/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -325,7 +325,7 @@ class FixedStreamArrayIterator
   FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
       : Array(Array), Index(Index) {}
 
-  FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other)
+  FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other)
       : Array(Other.Array), Index(Other.Index) {}
   FixedStreamArrayIterator<T> &
   operator=(const FixedStreamArrayIterator<T> &Other) {
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 6795aed7b04e..da6e79b91261 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -252,7 +252,7 @@ bool isIgnored(StringRef PassID) {
 } // namespace
 
 template <typename IRUnitT>
-ChangeReporter<IRUnitT>::~ChangeReporter<IRUnitT>() {
+ChangeReporter<IRUnitT>::~ChangeReporter() {
   assert(BeforeStack.empty() && "Problem with Change Printer stack.");
 }
 

From 00b850ca57647350750c24c9ac0671c7a1f857e4 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Tue, 23 Feb 2021 14:07:13 -0800
Subject: [PATCH 217/244] Fix constructor declarations that are invalid in
 C++20 onwards.

Under C++ CWG DR 2237, the constructor for a class template C must be
written as 'C(...)' not as 'C<T>(...)'. This fixes a build failure with
GCC in C++20 mode.

In passing, remove some other redundant '<T>' qualification from the
affected classes.
---
 llvm/include/llvm/ADT/STLExtras.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 63c7f48a5bd2..7741edf04959 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1820,9 +1820,9 @@ template <typename R> struct result_pair {
   result_pair(std::size_t Index, IterOfRange<R> Iter)
       : Index(Index), Iter(Iter) {}
 
-  result_pair<R>(const result_pair<R> &Other)
+  result_pair(const result_pair<R> &Other)
       : Index(Other.Index), Iter(Other.Iter) {}
-  result_pair<R> &operator=(const result_pair<R> &Other) {
+  result_pair &operator=(const result_pair &Other) {
     Index = Other.Index;
     Iter = Other.Iter;
     return *this;
@@ -1856,22 +1856,22 @@ class enumerator_iter
   result_type &operator*() { return Result; }
   const result_type &operator*() const { return Result; }
 
-  enumerator_iter<R> &operator++() {
+  enumerator_iter &operator++() {
     assert(Result.Index != std::numeric_limits<size_t>::max());
     ++Result.Iter;
     ++Result.Index;
     return *this;
   }
 
-  bool operator==(const enumerator_iter<R> &RHS) const {
+  bool operator==(const enumerator_iter &RHS) const {
     // Don't compare indices here, only iterators.  It's possible for an end
     // iterator to have different indices depending on whether it was created
     // by calling std::end() versus incrementing a valid iterator.
     return Result.Iter == RHS.Result.Iter;
   }
 
-  enumerator_iter<R>(const enumerator_iter<R> &Other) : Result(Other.Result) {}
-  enumerator_iter<R> &operator=(const enumerator_iter<R> &Other) {
+  enumerator_iter(const enumerator_iter &Other) : Result(Other.Result) {}
+  enumerator_iter &operator=(const enumerator_iter &Other) {
     Result = Other.Result;
     return *this;
   }

From 2777ba16ca6d180b1963179ced04080593b85c50 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Sun, 13 Mar 2022 12:26:56 +0100
Subject: [PATCH 218/244] Remove cyclades

---
 .../sanitizer_common_interceptors_ioctl.inc           |  9 ---------
 .../sanitizer_platform_limits_posix.cpp               | 11 -----------
 .../sanitizer_platform_limits_posix.h                 | 10 ----------
 3 files changed, 30 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
index 7f181258eab5..b7da65987557 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
@@ -370,15 +370,6 @@ static void ioctl_table_fill() {
 
 #if SANITIZER_GLIBC
   // _(SIOCDEVPLIP, WRITE, struct_ifreq_sz); // the same as EQL_ENSLAVE
-  _(CYGETDEFTHRESH, WRITE, sizeof(int));
-  _(CYGETDEFTIMEOUT, WRITE, sizeof(int));
-  _(CYGETMON, WRITE, struct_cyclades_monitor_sz);
-  _(CYGETTHRESH, WRITE, sizeof(int));
-  _(CYGETTIMEOUT, WRITE, sizeof(int));
-  _(CYSETDEFTHRESH, NONE, 0);
-  _(CYSETDEFTIMEOUT, NONE, 0);
-  _(CYSETTHRESH, NONE, 0);
-  _(CYSETTIMEOUT, NONE, 0);
   _(EQL_EMANCIPATE, WRITE, struct_ifreq_sz);
   _(EQL_ENSLAVE, WRITE, struct_ifreq_sz);
   _(EQL_GETMASTRCFG, WRITE, struct_ifreq_sz);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index 12dd39e674ac..7abaeb880bf3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -143,7 +143,6 @@ typedef struct user_fpregs elf_fpregset_t;
 # include <sys/procfs.h>
 #endif
 #include <sys/user.h>
-#include <linux/cyclades.h>
 #include <linux/if_eql.h>
 #include <linux/if_plip.h>
 #include <linux/lp.h>
@@ -459,7 +458,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
 
 #if SANITIZER_GLIBC
   unsigned struct_ax25_parms_struct_sz = sizeof(struct ax25_parms_struct);
-  unsigned struct_cyclades_monitor_sz = sizeof(struct cyclades_monitor);
 #if EV_VERSION > (0x010000)
   unsigned struct_input_keymap_entry_sz = sizeof(struct input_keymap_entry);
 #else
@@ -823,15 +821,6 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
 #endif // SANITIZER_LINUX
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
-  unsigned IOCTL_CYGETDEFTHRESH = CYGETDEFTHRESH;
-  unsigned IOCTL_CYGETDEFTIMEOUT = CYGETDEFTIMEOUT;
-  unsigned IOCTL_CYGETMON = CYGETMON;
-  unsigned IOCTL_CYGETTHRESH = CYGETTHRESH;
-  unsigned IOCTL_CYGETTIMEOUT = CYGETTIMEOUT;
-  unsigned IOCTL_CYSETDEFTHRESH = CYSETDEFTHRESH;
-  unsigned IOCTL_CYSETDEFTIMEOUT = CYSETDEFTIMEOUT;
-  unsigned IOCTL_CYSETTHRESH = CYSETTHRESH;
-  unsigned IOCTL_CYSETTIMEOUT = CYSETTIMEOUT;
   unsigned IOCTL_EQL_EMANCIPATE = EQL_EMANCIPATE;
   unsigned IOCTL_EQL_ENSLAVE = EQL_ENSLAVE;
   unsigned IOCTL_EQL_GETMASTRCFG = EQL_GETMASTRCFG;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 836b178c131b..8a156b7fcb80 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -983,7 +983,6 @@ extern unsigned struct_vt_mode_sz;
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
 extern unsigned struct_ax25_parms_struct_sz;
-extern unsigned struct_cyclades_monitor_sz;
 extern unsigned struct_input_keymap_entry_sz;
 extern unsigned struct_ipx_config_data_sz;
 extern unsigned struct_kbdiacrs_sz;
@@ -1328,15 +1327,6 @@ extern unsigned IOCTL_VT_WAITACTIVE;
 #endif  // SANITIZER_LINUX
 
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
-extern unsigned IOCTL_CYGETDEFTHRESH;
-extern unsigned IOCTL_CYGETDEFTIMEOUT;
-extern unsigned IOCTL_CYGETMON;
-extern unsigned IOCTL_CYGETTHRESH;
-extern unsigned IOCTL_CYGETTIMEOUT;
-extern unsigned IOCTL_CYSETDEFTHRESH;
-extern unsigned IOCTL_CYSETDEFTIMEOUT;
-extern unsigned IOCTL_CYSETTHRESH;
-extern unsigned IOCTL_CYSETTIMEOUT;
 extern unsigned IOCTL_EQL_EMANCIPATE;
 extern unsigned IOCTL_EQL_ENSLAVE;
 extern unsigned IOCTL_EQL_GETMASTRCFG;

From e1d9c085db80b02fddf8f3e14baf48c55826cc32 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Sun, 13 Mar 2022 13:25:20 +0100
Subject: [PATCH 219/244] Drop class template argument from constructor

---
 llvm/unittests/IR/PassBuilderCallbacksTest.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp
index edd46b8521d6..e771a991be32 100644
--- a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp
+++ b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp
@@ -239,7 +239,7 @@ struct MockAnalysisHandle<Loop>
   MOCK_METHOD3_T(invalidate, bool(Loop &, const PreservedAnalyses &,
                                   LoopAnalysisManager::Invalidator &));
 
-  MockAnalysisHandle<Loop>() { this->setDefaults(); }
+  MockAnalysisHandle() { this->setDefaults(); }
 };
 
 template <>
@@ -250,7 +250,7 @@ struct MockAnalysisHandle<Function>
   MOCK_METHOD3(invalidate, bool(Function &, const PreservedAnalyses &,
                                 FunctionAnalysisManager::Invalidator &));
 
-  MockAnalysisHandle<Function>() { setDefaults(); }
+  MockAnalysisHandle() { setDefaults(); }
 };
 
 template <>
@@ -264,7 +264,7 @@ struct MockAnalysisHandle<LazyCallGraph::SCC>
   MOCK_METHOD3(invalidate, bool(LazyCallGraph::SCC &, const PreservedAnalyses &,
                                 CGSCCAnalysisManager::Invalidator &));
 
-  MockAnalysisHandle<LazyCallGraph::SCC>() { setDefaults(); }
+  MockAnalysisHandle() { setDefaults(); }
 };
 
 template <>
@@ -275,7 +275,7 @@ struct MockAnalysisHandle<Module>
   MOCK_METHOD3(invalidate, bool(Module &, const PreservedAnalyses &,
                                 ModuleAnalysisManager::Invalidator &));
 
-  MockAnalysisHandle<Module>() { setDefaults(); }
+  MockAnalysisHandle() { setDefaults(); }
 };
 
 static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {

From 0597b8edc32b37f76ffc01adfb93a661da60ec5e Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Sun, 13 Mar 2022 13:27:51 +0100
Subject: [PATCH 220/244] ld.bfd test: fix linking flags

---
 compiler-rt/test/asan/TestCases/Linux/global-overflow-bfd.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/asan/TestCases/Linux/global-overflow-bfd.cpp b/compiler-rt/test/asan/TestCases/Linux/global-overflow-bfd.cpp
index 117a761af91f..1c29dba3a3a1 100644
--- a/compiler-rt/test/asan/TestCases/Linux/global-overflow-bfd.cpp
+++ b/compiler-rt/test/asan/TestCases/Linux/global-overflow-bfd.cpp
@@ -1,6 +1,6 @@
 // Test that gc-sections-friendly instrumentation of globals does not introduce
 // false negatives with the BFD linker.
-// RUN: %clangxx_asan -fuse-ld=bfd -Wl,-gc-sections -ffunction-sections -fdata-sections -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -fuse-ld=bfd -ldl -lrt -lpthread -Wl,-gc-sections -ffunction-sections -fdata-sections -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
 #include <string.h>
 int main(int argc, char **argv) {

From 754704f1ba993f0892ac30b9016822a9ff32c0c4 Mon Sep 17 00:00:00 2001
From: Andrea Gussoni <andrealinux1@gmail.com>
Date: Thu, 29 Nov 2018 14:40:28 +0100
Subject: [PATCH 221/244] Enable llvm documentation

---
 llvm/docs/doxygen.cfg.in | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/docs/doxygen.cfg.in b/llvm/docs/doxygen.cfg.in
index 7a6d531ad255..d799ed861606 100644
--- a/llvm/docs/doxygen.cfg.in
+++ b/llvm/docs/doxygen.cfg.in
@@ -1127,7 +1127,7 @@ HTML_TIMESTAMP         = YES
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_DYNAMIC_SECTIONS  = NO
+HTML_DYNAMIC_SECTIONS  = YES
 
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
@@ -1154,7 +1154,7 @@ HTML_INDEX_NUM_ENTRIES = 100
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-GENERATE_DOCSET        = NO
+GENERATE_DOCSET        = YES
 
 # This tag determines the name of the docset feed. A documentation feed provides
 # an umbrella under which multiple documentation sets from a single provider
@@ -1170,7 +1170,7 @@ DOCSET_FEEDNAME        = "Doxygen generated docs"
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_BUNDLE_ID       = org.doxygen.Project
+DOCSET_BUNDLE_ID       = llvm
 
 # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
 # the documentation publisher. This should be a reverse domain-name style
@@ -1178,7 +1178,7 @@ DOCSET_BUNDLE_ID       = org.doxygen.Project
 # The default value is: org.doxygen.Publisher.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+DOCSET_PUBLISHER_ID    = Publisher
 
 # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
 # The default value is: Publisher.
@@ -1268,7 +1268,7 @@ QCH_FILE               = @llvm_doxygen_qch_filename@
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHP_NAMESPACE          = @llvm_doxygen_qhp_namespace@
+QHP_NAMESPACE          = llvm
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
@@ -1338,7 +1338,7 @@ ECLIPSE_DOC_ID         = org.doxygen.Project
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-DISABLE_INDEX          = NO
+DISABLE_INDEX          = YES
 
 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
 # structure should be generated to display hierarchical information. If the tag
@@ -1469,7 +1469,7 @@ MATHJAX_CODEFILE       =
 # The default value is: YES.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-SEARCHENGINE           = @enable_searchengine@
+SEARCHENGINE           = NO
 
 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
 # implemented using a web server instead of a web client using Javascript. There
@@ -2091,7 +2091,7 @@ CLASS_GRAPH            = YES
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-COLLABORATION_GRAPH    = YES
+COLLABORATION_GRAPH    = NO
 
 # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
 # groups, showing the direct groups dependencies.

From 5d721a380102b5df1692e1ad0697a7cddcfd3882 Mon Sep 17 00:00:00 2001
From: Andrea Gussoni <andrealinux1@gmail.com>
Date: Sun, 28 Apr 2019 12:54:37 +0000
Subject: [PATCH 222/244] Enable clang documentation

---
 clang/docs/doxygen.cfg.in | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/docs/doxygen.cfg.in b/clang/docs/doxygen.cfg.in
index 449552d99d46..57ebed8c5a12 100644
--- a/clang/docs/doxygen.cfg.in
+++ b/clang/docs/doxygen.cfg.in
@@ -1126,7 +1126,7 @@ HTML_TIMESTAMP         = YES
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_DYNAMIC_SECTIONS  = NO
+HTML_DYNAMIC_SECTIONS  = YES
 
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
@@ -1153,7 +1153,7 @@ HTML_INDEX_NUM_ENTRIES = 100
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-GENERATE_DOCSET        = NO
+GENERATE_DOCSET        = YES
 
 # This tag determines the name of the docset feed. A documentation feed provides
 # an umbrella under which multiple documentation sets from a single provider
@@ -1169,7 +1169,7 @@ DOCSET_FEEDNAME        = "Doxygen generated docs"
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_BUNDLE_ID       = org.doxygen.Project
+DOCSET_BUNDLE_ID       = clang
 
 # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
 # the documentation publisher. This should be a reverse domain-name style
@@ -1177,7 +1177,7 @@ DOCSET_BUNDLE_ID       = org.doxygen.Project
 # The default value is: org.doxygen.Publisher.
 # This tag requires that the tag GENERATE_DOCSET is set to YES.
 
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+DOCSET_PUBLISHER_ID    = Publisher
 
 # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
 # The default value is: Publisher.
@@ -1252,7 +1252,7 @@ TOC_EXPAND             = NO
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-GENERATE_QHP           = @clang_doxygen_generate_qhp@
+GENERATE_QHP           = YES
 
 # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
 # the file name of the resulting .qch file. The path specified is relative to
@@ -1267,7 +1267,7 @@ QCH_FILE               = @clang_doxygen_qch_filename@
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
-QHP_NAMESPACE          = @clang_doxygen_qhp_namespace@
+QHP_NAMESPACE          = clang
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
@@ -1337,7 +1337,7 @@ ECLIPSE_DOC_ID         = org.doxygen.Project
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-DISABLE_INDEX          = NO
+DISABLE_INDEX          = YES
 
 # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
 # structure should be generated to display hierarchical information. If the tag
@@ -1468,7 +1468,7 @@ MATHJAX_CODEFILE       =
 # The default value is: YES.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-SEARCHENGINE           = @enable_searchengine@
+SEARCHENGINE           = NO
 
 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
 # implemented using a web server instead of a web client using Javascript. There
@@ -2090,7 +2090,7 @@ CLASS_GRAPH            = YES
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-COLLABORATION_GRAPH    = YES
+COLLABORATION_GRAPH    = NO
 
 # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
 # groups, showing the direct groups dependencies.

From 7bff0fe67d6d60d522314aaa1d6ce3859a9aca86 Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Tue, 23 Jun 2020 17:19:19 +0200
Subject: [PATCH 223/244] Triple.cpp: fix getArchTypeForLLVMName for "x86_64"

---
 llvm/lib/Support/Triple.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 4f483c965282..b4c47bf7dd2d 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -307,6 +307,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("thumbeb", thumbeb)
     .Case("x86", x86)
     .Case("x86-64", x86_64)
+    .Case("x86_64", x86_64)
     .Case("xcore", xcore)
     .Case("nvptx", nvptx)
     .Case("nvptx64", nvptx64)

From 6b6a37b8845dc72c4ec23c0e77105098bd44cb29 Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Thu, 15 Oct 2020 16:04:32 +0200
Subject: [PATCH 224/244] Triple.cpp: fix getArchTypeForLLVMName for "s390x"

---
 llvm/lib/Support/Triple.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index b4c47bf7dd2d..c6009c326892 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -301,6 +301,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("sparcel", sparcel)
     .Case("sparcv9", sparcv9)
     .Case("systemz", systemz)
+    .Case("s390x", systemz)
     .Case("tce", tce)
     .Case("tcele", tcele)
     .Case("thumb", thumb)

From c9f9e529a03543af177034c53edcea94f4bf3661 Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Thu, 15 Oct 2020 17:10:55 +0200
Subject: [PATCH 225/244] Triple.cpp: fix getArchTypeForLLVMName for "i386"

---
 llvm/lib/Support/Triple.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index c6009c326892..496a4db5a5c3 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -307,6 +307,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("thumb", thumb)
     .Case("thumbeb", thumbeb)
     .Case("x86", x86)
+    .Case("i386", x86)
     .Case("x86-64", x86_64)
     .Case("x86_64", x86_64)
     .Case("xcore", xcore)

From a6e887712d1cb8a6c8f57a6f60c9e73cf6028214 Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietrofezzardi@gmail.com>
Date: Tue, 5 Mar 2019 10:27:10 +0100
Subject: [PATCH 226/244] Fix memory leak in `CreateSigAltStack`

---
 llvm/lib/Support/Unix/Signals.inc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 3d7b5d2fe5aa..71cc5c15ed71 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -254,8 +254,8 @@ static struct {
 // reported as a leak. We don't make any attempt to remove our alt signal
 // stack if we remove our signal handlers; that can't be done reliably if
 // someone else is also trying to do the same thing.
-static stack_t OldAltStack;
-static void* NewAltStackPointer;
+thread_local static stack_t OldAltStack;
+thread_local static void* NewAltStackPointer;
 
 static void CreateSigAltStack() {
   const size_t AltStackSize = MINSIGSTKSZ + 64 * 1024;
@@ -271,10 +271,13 @@ static void CreateSigAltStack() {
 
   stack_t AltStack = {};
   AltStack.ss_sp = static_cast<char *>(safe_malloc(AltStackSize));
-  NewAltStackPointer = AltStack.ss_sp; // Save to avoid reporting a leak.
   AltStack.ss_size = AltStackSize;
-  if (sigaltstack(&AltStack, &OldAltStack) != 0)
+  if (sigaltstack(&AltStack, &OldAltStack) != 0) {
     free(AltStack.ss_sp);
+  } else {
+    free(NewAltStackPointer);
+    NewAltStackPointer = AltStack.ss_sp;
+  }
 }
 #else
 static void CreateSigAltStack() {}

From 24093a304cabdb84c6de5a1a17832a41679ebaac Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Tue, 21 May 2019 23:13:13 +0200
Subject: [PATCH 227/244] MachO: improve relocation information

---
 llvm/include/llvm/BinaryFormat/MachO.h |  2 ++
 llvm/include/llvm/Object/MachO.h       |  6 ++++--
 llvm/lib/Object/MachOObjectFile.cpp    | 18 ++++++++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index f5d5ec328b5e..27fb6cb55348 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -227,6 +227,7 @@ enum DataRegionType {
 };
 
 enum RebaseType {
+  REBASE_TYPE_INVALID = 0u,
   REBASE_TYPE_POINTER = 1u,
   REBASE_TYPE_TEXT_ABSOLUTE32 = 2u,
   REBASE_TYPE_TEXT_PCREL32 = 3u
@@ -247,6 +248,7 @@ enum RebaseOpcode {
 };
 
 enum BindType {
+  BIND_TYPE_INVALID = 0u,
   BIND_TYPE_POINTER = 1u,
   BIND_TYPE_TEXT_ABSOLUTE32 = 2u,
   BIND_TYPE_TEXT_PCREL32 = 3u
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 7eb017397846..f03325b783dc 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -172,6 +172,7 @@ class MachORebaseEntry {
   int32_t segmentIndex() const;
   uint64_t segmentOffset() const;
   StringRef typeName() const;
+  MachO::RebaseType type() const;
   StringRef segmentName() const;
   StringRef sectionName() const;
   uint64_t address() const;
@@ -195,7 +196,7 @@ class MachORebaseEntry {
   int32_t SegmentIndex = -1;
   uint64_t RemainingLoopCount = 0;
   uint64_t AdvanceAmount = 0;
-  uint8_t  RebaseType = 0;
+  MachO::RebaseType  RebaseType = MachO::REBASE_TYPE_INVALID;
   uint8_t  PointerSize;
   bool     Done = false;
 };
@@ -218,6 +219,7 @@ class MachOBindEntry {
   int32_t segmentIndex() const;
   uint64_t segmentOffset() const;
   StringRef typeName() const;
+  MachO::BindType type() const;
   StringRef symbolName() const;
   uint32_t flags() const;
   int64_t addend() const;
@@ -252,7 +254,7 @@ class MachOBindEntry {
   int64_t  Addend = 0;
   uint64_t RemainingLoopCount = 0;
   uint64_t AdvanceAmount = 0;
-  uint8_t  BindType = 0;
+  MachO::BindType BindType = MachO::BIND_TYPE_INVALID;
   uint8_t  PointerSize;
   Kind     TableKind;
   bool     Done = false;
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 302255926289..c0d10403fc7c 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -3182,8 +3182,9 @@ void MachORebaseEntry::moveNext() {
       DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n");
       break;
     case MachO::REBASE_OPCODE_SET_TYPE_IMM:
-      RebaseType = ImmValue;
-      if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) {
+      RebaseType = static_cast<MachO::RebaseType>(ImmValue);
+      if (RebaseType <= MachO::REBASE_TYPE_INVALID
+          || RebaseType < MachO::REBASE_TYPE_POINTER) {
         *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " +
                             Twine((int)RebaseType) + " for opcode at: 0x" +
                             Twine::utohexstr(OpcodeStart - Opcodes.begin()));
@@ -3412,8 +3413,12 @@ int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; }
 
 uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; }
 
+MachO::RebaseType MachORebaseEntry::type() const { return RebaseType; }
+
 StringRef MachORebaseEntry::typeName() const {
   switch (RebaseType) {
+  case MachO::REBASE_TYPE_INVALID:
+    return "invalid";
   case MachO::REBASE_TYPE_POINTER:
     return "pointer";
   case MachO::REBASE_TYPE_TEXT_ABSOLUTE32:
@@ -3642,8 +3647,9 @@ void MachOBindEntry::moveNext() {
       }
       break;
     case MachO::BIND_OPCODE_SET_TYPE_IMM:
-      BindType = ImmValue;
-      if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) {
+      BindType = static_cast<MachO::BindType>(ImmValue);
+      if (ImmValue <= MachO::BIND_TYPE_INVALID
+          || ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) {
         *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " +
                             Twine((int)ImmValue) + " for opcode at: 0x" +
                             Twine::utohexstr(OpcodeStart - Opcodes.begin()));
@@ -3956,8 +3962,12 @@ int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
 
 uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; }
 
+MachO::BindType MachOBindEntry::type() const { return BindType; }
+
 StringRef MachOBindEntry::typeName() const {
   switch (BindType) {
+  case MachO::BIND_TYPE_INVALID:
+    return "invalid";
   case MachO::BIND_TYPE_POINTER:
     return "pointer";
   case MachO::BIND_TYPE_TEXT_ABSOLUTE32:

From 968b1c6af201438808a7ef29d241b97f97188d01 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Tue, 21 May 2019 20:53:14 +0200
Subject: [PATCH 228/244] LazyValueInfo: handle Intrinsic::ctlz

---
 llvm/lib/Analysis/LazyValueInfo.cpp | 102 ++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index ba2b6fe94c18..7550ed854079 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -613,6 +613,108 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueImpl(
     if (auto *CI = dyn_cast<CastInst>(BBI))
       return solveBlockValueCast(CI, BB);
 
+    if (auto *IC = dyn_cast<CallInst>(BBI)) {
+      if (auto *F = IC->getCalledFunction()) {
+        if (F->getIntrinsicID() == Intrinsic::ctlz) {
+
+          Value *Argument = IC->getArgOperand(0);
+
+          using VLE = ValueLatticeElement;
+          Optional<VLE> BlockVal = getBlockValue(Argument, BB);
+
+          if (not BlockVal.hasValue())
+            return BlockVal;
+
+          VLE V = BlockVal.getValue();
+
+          const unsigned OperandBitWidth = DL.getTypeSizeInBits(BBI->getType());
+          auto GetAPInt = [OperandBitWidth] (uint64_t V) {
+            return APInt(OperandBitWidth, V);
+          };
+          auto GetRange = [&GetAPInt] (uint64_t Lower, uint64_t Upper) {
+            return VLE::getRange(ConstantRange(GetAPInt(Lower), GetAPInt(Upper)));
+          };
+
+          bool ZeroIsUndef = cast<ConstantInt>(IC->getArgOperand(1))->isOne();
+          Constant *C = nullptr;
+          if (V.isConstant())
+            C = V.getConstant();
+          else if (V.isNotConstant())
+            C = V.getNotConstant();
+          ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
+          const APInt *NV = CI != nullptr ? &CI->getValue() : nullptr;
+
+          Optional<ValueLatticeElement> Res = None;
+
+          if (V.isUnknownOrUndef()) {
+            // No valid values
+            Res = V;
+          } else if (V.isOverdefined()) {
+            if (ZeroIsUndef) {
+              // It might be zero, the result is overdefined
+              Res = VLE::getOverdefined();
+            } else {
+              // From 0 to the bit width
+              Res = GetRange(0, OperandBitWidth + 1);
+            }
+          } else if (V.isConstant()) {
+            if (ZeroIsUndef && (CI == nullptr || CI->isZero())) {
+              // If we have an explicit zero (or we can't tell), the result is undefined
+              Res = VLE::getOverdefined();
+            } else if (NV != nullptr) {
+              // Zero is safe, and we have the constant, return the exact result
+              Res = VLE::get(ConstantInt::get(IC->getType(), NV->countLeadingZeros()));
+            } else {
+              // Zero is safe but the constant is not known, get the range of bits
+              Res = GetRange(0, OperandBitWidth + 1);
+            }
+          } else if (V.isNotConstant()) {
+            if (CI != nullptr && CI->isZero()) {
+              // We can explicitly exclude zero, valid results are from 0 to bit
+              // width minus one
+              Res = GetRange(0, OperandBitWidth);
+            } else if (ZeroIsUndef) {
+              // Zero is not safe, and we can't explicitly exclude it
+              Res = VLE::getOverdefined();
+            } else {
+              // Zero is safe, but we can't say much. We could say "not one" but
+              // we cannot express the disjoint range
+              Res = GetRange(0, OperandBitWidth + 1);
+            }
+          } else if (V.isConstantRange()) {
+            const ConstantRange &Range = V.getConstantRange();
+            if (ZeroIsUndef && Range.contains(GetAPInt(0))) {
+              // Zero is not safe and it's not excluded by the range
+              Res = VLE::getOverdefined();
+            } else if (Range.isWrappedSet() || Range.isFullSet()) {
+              // The range wraps, therefore it includes the two extreme
+              // encodings, all zeros and all ones. The only way we can express
+              // this [0, BitWidth + 1)
+              Res = GetRange(0, OperandBitWidth + 1);
+            } else {
+              // Zero is either safe or not in the range. The output range is
+              // composed by the result of countLeadingZero of the two extremes,
+              // sorted.
+              APInt Lower = GetAPInt(Range.getLower().countLeadingZeros());
+              APInt Last = Range.getUpper() - 1;
+              APInt Upper = GetAPInt(Last.countLeadingZeros());
+
+              if (Lower.eq(Upper)) {
+                Res = VLE::get(ConstantInt::get(IC->getType(), Lower));
+              } else {
+                if (Lower.ugt(Upper))
+                  std::swap(Lower, Upper);
+                ++Upper;
+                Res = VLE::getRange(ConstantRange(Lower, Upper));
+              }
+            }
+          }
+
+          return Res;
+        }
+      }
+    }
+
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
       return solveBlockValueBinaryOp(BO, BB);
 

From 3e9e7484ee154c34653f52e2fe15c3edef1458c6 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Wed, 22 May 2019 11:43:33 +0200
Subject: [PATCH 229/244] LazyValueInfo: handle x & 0xfff0 == 0x1230

This commit improves the handling of comparision of a value masked in a
certain way with a constant. For example, for a 16-bit integer, we might
have x & 0xfff0 == 0x1230. This is a typical situation produced by
InstCombine to efficiently check if x is between 0x1230 and 0x12340
(excluded).
---
 llvm/lib/Analysis/LazyValueInfo.cpp | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 7550ed854079..f960d4700325 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1230,13 +1230,38 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
   CmpInst::Predicate EdgePred =
       isTrueDest ? ICI->getPredicate() : ICI->getInversePredicate();
 
-  if (isa<Constant>(RHS)) {
-    if (ICI->isEquality() && LHS == Val) {
+  if (isa<Constant>(RHS) && ICI->isEquality()) {
+    if (LHS == Val) {
+      // We know that V has the RHS constant if the edge predicate is equality.
       if (EdgePred == ICmpInst::ICMP_EQ)
         return ValueLatticeElement::get(cast<Constant>(RHS));
       else if (!isa<UndefValue>(RHS))
         return ValueLatticeElement::getNot(cast<Constant>(RHS));
     }
+
+    if (ConstantInt *CIRHS = dyn_cast<ConstantInt>(RHS)) {
+      ConstantInt *Mask;
+      if (match(LHS, m_And(m_Specific(Val), m_ConstantInt(Mask)))) {
+        const APInt &FixedBits = CIRHS->getValue();
+        APInt MaskValue = Mask->getValue();
+        MaskValue.flipAllBits();
+
+        if (MaskValue.isMask()) {
+          using VLE = ValueLatticeElement;
+          if ((MaskValue & FixedBits) == 0) {
+            if (EdgePred == ICmpInst::ICMP_EQ)
+              return VLE::getRange({FixedBits, FixedBits + MaskValue + 1});
+            else
+              return VLE::getRange({FixedBits + MaskValue + 1, FixedBits});
+          } else {
+            if (EdgePred == ICmpInst::ICMP_EQ)
+              return VLE();
+            else
+              return VLE::getOverdefined();
+          }
+        }
+      }
+    }
   }
 
   if (!Val->getType()->isIntegerTy())

From 654997962654658dfa5a7bedcf126463268c3689 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Wed, 2 Sep 2020 16:40:52 +0200
Subject: [PATCH 230/244] Drop git remove in --version

---
 llvm/cmake/modules/VersionFromVCS.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
index 18edbeabe3e4..23e80a91503d 100644
--- a/llvm/cmake/modules/VersionFromVCS.cmake
+++ b/llvm/cmake/modules/VersionFromVCS.cmake
@@ -5,7 +5,7 @@
 
 function(get_source_info path revision repository)
   find_package(Git QUIET)
-  if(GIT_FOUND)
+  if(FALSE)
     execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --git-dir
       WORKING_DIRECTORY ${path}
       RESULT_VARIABLE git_result

From 6085e9c024236f6c25b0f560316d9d501cbf0e12 Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Thu, 26 Nov 2020 15:47:15 +0100
Subject: [PATCH 231/244] InstPrinter: Add AArch64 register markup

---
 .../MCTargetDesc/AArch64InstPrinter.cpp       | 143 +++++++++++-------
 .../AArch64/MCTargetDesc/AArch64InstPrinter.h |   1 +
 2 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 340120d2b9e8..261df72d4a08 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -53,7 +53,12 @@ AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
 
 void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // This is for .cfi directives.
-  OS << getRegisterName(RegNo);
+  OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
+}
+
+void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo, unsigned
+        AltIdx) const {
+  OS << markup("<reg:") << getRegisterName(RegNo, AltIdx) << markup(">");
 }
 
 void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -105,8 +110,10 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       }
 
       if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(getWRegFromXReg(Op1.getReg()));
+        O << '\t' << AsmMnemonic << '\t';
+        printRegName(O, Op0.getReg());
+        O << ", ";
+        printRegName(O, getWRegFromXReg(Op1.getReg()));
         printAnnotation(O, Annot);
         return;
       }
@@ -141,8 +148,11 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
         shift = immr;
       }
       if (AsmMnemonic) {
-        O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
-          << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
+        O << '\t' << AsmMnemonic << '\t';
+        printRegName(O, Op0.getReg());
+        O << ", ";
+        printRegName(O, Op1.getReg());
+        O << ", #" << shift;
         printAnnotation(O, Annot);
         return;
       }
@@ -150,17 +160,22 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
 
     // SBFIZ/UBFIZ aliases
     if (Op2.getImm() > Op3.getImm()) {
-      O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t'
-        << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
-        << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1;
+      O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t';
+      printRegName(O, Op0.getReg());
+      O << ", ";
+      printRegName(O, Op1.getReg());
+      O << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #"
+        << Op3.getImm() + 1;
       printAnnotation(O, Annot);
       return;
     }
 
     // Otherwise SBFX/UBFX is the preferred form
-    O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
-      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
-      << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
+    O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t';
+    printRegName(O, Op0.getReg());
+    O << ", ";
+    printRegName(O, Op1.getReg());
+    O << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
     printAnnotation(O, Annot);
     return;
   }
@@ -179,8 +194,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       int LSB = (BitWidth - ImmR) % BitWidth;
       int Width = ImmS + 1;
 
-      O << "\tbfc\t" << getRegisterName(Op0.getReg())
-        << ", #" << LSB << ", #" << Width;
+      O << "\tbfc\t";
+      printRegName(O, Op0.getReg());
+      O << ", #" << LSB << ", #" << Width;
       printAnnotation(O, Annot);
       return;
     } else if (ImmS < ImmR) {
@@ -189,8 +205,11 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       int LSB = (BitWidth - ImmR) % BitWidth;
       int Width = ImmS + 1;
 
-      O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
-        << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
+      O << "\tbfi\t";
+      printRegName(O, Op0.getReg());
+      O << ", ";
+      printRegName(O, Op2.getReg());
+      O << ", #" << LSB << ", #" << Width;
       printAnnotation(O, Annot);
       return;
     }
@@ -198,9 +217,11 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     int LSB = ImmR;
     int Width = ImmS - ImmR + 1;
     // Otherwise BFXIL the preferred form
-    O << "\tbfxil\t"
-      << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
-      << ", #" << LSB << ", #" << Width;
+    O << "\tbfxil\t";
+    printRegName(O, Op0.getReg());
+    O << ", ";
+    printRegName(O, Op2.getReg());
+    O << ", #" << LSB << ", #" << Width;
     printAnnotation(O, Annot);
     return;
   }
@@ -216,14 +237,17 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     else
       O << "\tmovn\t";
 
-    O << getRegisterName(MI->getOperand(0).getReg()) << ", #";
+    printRegName(O, MI->getOperand(0).getReg());
+    O << ", #";
     MI->getOperand(1).getExpr()->print(O, &MAI);
     return;
   }
 
   if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) &&
       MI->getOperand(2).isExpr()) {
-    O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #";
+    O << "\tmovk\t";
+    printRegName(O, MI->getOperand(0).getReg());
+    O << ", #";
     MI->getOperand(2).getExpr()->print(O, &MAI);
     return;
   }
@@ -241,8 +265,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
 
     if (AArch64_AM::isMOVZMovAlias(Value, Shift,
                                    Opcode == AArch64::MOVZXi ? 64 : 32)) {
-      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-        << formatImm(SignExtend64(Value, RegWidth));
+      O << "\tmov\t";
+      printRegName(O, MI->getOperand(0).getReg());
+      O << ", #" << formatImm(SignExtend64(Value, RegWidth));
       return;
     }
   }
@@ -256,8 +281,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       Value = Value & 0xffffffff;
 
     if (AArch64_AM::isMOVNMovAlias(Value, Shift, RegWidth)) {
-      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-        << formatImm(SignExtend64(Value, RegWidth));
+      O << "\tmov\t";
+      printRegName(O, MI->getOperand(0).getReg());
+      O << ", #" << formatImm(SignExtend64(Value, RegWidth));
       return;
     }
   }
@@ -270,8 +296,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     uint64_t Value = AArch64_AM::decodeLogicalImmediate(
         MI->getOperand(2).getImm(), RegWidth);
     if (!AArch64_AM::isAnyMOVWMovAlias(Value, RegWidth)) {
-      O << "\tmov\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
-        << formatImm(SignExtend64(Value, RegWidth));
+      O << "\tmov\t";
+      printRegName(O, MI->getOperand(0).getReg());
+      O << ", #" << formatImm(SignExtend64(Value, RegWidth));
       return;
     }
   }
@@ -714,14 +741,15 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, uint64_t Address,
 
   bool IsTbx;
   if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
-    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
-      << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
+    O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t';
+    printRegName(O, MI->getOperand(0).getReg(), AArch64::vreg);
+    O << ", ";
 
     unsigned ListOpNum = IsTbx ? 2 : 1;
     printVectorList(MI, ListOpNum, STI, O, "");
 
-    O << ", "
-      << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
+    O << ", ";
+    printRegName(O, MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
     printAnnotation(O, Annot);
     return;
   }
@@ -739,14 +767,17 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, uint64_t Address,
 
     // Next the address: [xN]
     unsigned AddrReg = MI->getOperand(OpNum++).getReg();
-    O << ", [" << getRegisterName(AddrReg) << ']';
+    O << ", [";
+    printRegName(O, AddrReg);
+    O << ']';
 
     // Finally, there might be a post-indexed offset.
     if (LdStDesc->NaturalOffset != 0) {
       unsigned Reg = MI->getOperand(OpNum++).getReg();
-      if (Reg != AArch64::XZR)
-        O << ", " << getRegisterName(Reg);
-      else {
+      if (Reg != AArch64::XZR) {
+        O << ", ";
+        printRegName(O, Reg);
+      } else {
         assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
         O << ", #" << LdStDesc->NaturalOffset;
       }
@@ -866,8 +897,10 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   std::transform(Str.begin(), Str.end(), Str.begin(), ::tolower);
 
   O << '\t' << Str;
-  if (NeedsReg)
-    O << ", " << getRegisterName(MI->getOperand(4).getReg());
+  if (NeedsReg) {
+    O << ", ";
+    printRegName(O, MI->getOperand(4).getReg());
+  }
 
   return true;
 }
@@ -878,7 +911,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
-    O << getRegisterName(Reg);
+    printRegName(O, Reg);
   } else if (Op.isImm()) {
     printImm(MI, OpNo, STI, O);
   } else {
@@ -922,7 +955,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
     if (Reg == AArch64::XZR)
       O << "#" << Imm;
     else
-      O << getRegisterName(Reg);
+      printRegName(O, Reg);
   } else
     llvm_unreachable("unknown operand kind in printPostIncOperand64");
 }
@@ -933,7 +966,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isReg() && "Non-register vreg operand!");
   unsigned Reg = Op.getReg();
-  O << getRegisterName(Reg, AArch64::vreg);
+  printRegName(O, Reg, AArch64::vreg);
 }
 
 void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
@@ -990,14 +1023,14 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
 void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
                                               const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printRegName(O, MI->getOperand(OpNum).getReg());
   printShifter(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
                                                const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
-  O << getRegisterName(MI->getOperand(OpNum).getReg());
+  printRegName(O, MI->getOperand(OpNum).getReg());
   printArithExtend(MI, OpNum + 1, STI, O);
 }
 
@@ -1085,7 +1118,9 @@ void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
 void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
+  O << '[';
+  printRegName(O, MI->getOperand(OpNum).getReg());
+  O << ']';
 }
 
 template<int Scale>
@@ -1109,7 +1144,8 @@ void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
 void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
                                           unsigned Scale, raw_ostream &O) {
   const MCOperand MO1 = MI->getOperand(OpNum + 1);
-  O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
+  O << '[';
+  printRegName(O, MI->getOperand(OpNum).getReg());
   if (MO1.isImm()) {
       O << ", #" << formatImm(MO1.getImm() * Scale);
   } else {
@@ -1265,7 +1301,9 @@ void AArch64InstPrinter::printGPRSeqPairsClassOperand(const MCInst *MI,
 
   unsigned Even = MRI.getSubReg(Reg,  Sube);
   unsigned Odd = MRI.getSubReg(Reg,  Subo);
-  O << getRegisterName(Even) << ", " << getRegisterName(Odd);
+  printRegName(O, Even);
+  O << ", ";
+  printRegName(O, Odd);
 }
 
 void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
@@ -1309,10 +1347,13 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
   }
 
   for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
-    if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg))
-      O << getRegisterName(Reg) << LayoutSuffix;
-    else
-      O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix;
+    if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg)) {
+      printRegName(O, Reg);
+      O << LayoutSuffix;
+    } else {
+      printRegName(O, Reg, AArch64::vreg);
+      O << LayoutSuffix;
+    }
 
     if (i + 1 != NumRegs)
       O << ", ";
@@ -1543,7 +1584,7 @@ void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum,
   }
 
   unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(Reg);
+  printRegName(O, Reg);
   if (suffix != 0)
     O << '.' << suffix;
 }
@@ -1625,7 +1666,7 @@ void AArch64InstPrinter::printZPRasFPR(const MCInst *MI, unsigned OpNum,
     llvm_unreachable("Unsupported width");
   }
   unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(Reg - AArch64::Z0 + Base);
+  printRegName(O, Reg - AArch64::Z0 + Base);
 }
 
 template <unsigned ImmIs0, unsigned ImmIs1>
@@ -1642,7 +1683,7 @@ void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,
                                         const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
-  O << getRegisterName(getWRegFromXReg(Reg));
+  printRegName(O, getWRegFromXReg(Reg));
 }
 
 void AArch64InstPrinter::printGPR64x8(const MCInst *MI, unsigned OpNum,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 4be885e667d8..4b5d6dd56683 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -28,6 +28,7 @@ class AArch64InstPrinter : public MCInstPrinter {
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+  void printRegName(raw_ostream &OS, unsigned RegNo, unsigned AltIdx) const;
 
   // Autogenerated by tblgen.
   std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;

From 65ceb3e583d862ad9a73763471d9e4d80ec622e4 Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Thu, 26 Nov 2020 16:55:50 +0100
Subject: [PATCH 232/244] InstPrinter: Add AArch64 immediate markup

---
 .../MCTargetDesc/AArch64InstPrinter.cpp       | 118 +++++++++++-------
 1 file changed, 75 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 261df72d4a08..40dfbffa835b 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -152,7 +152,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
         printRegName(O, Op0.getReg());
         O << ", ";
         printRegName(O, Op1.getReg());
-        O << ", #" << shift;
+        O << ", " << markup("<imm:") << "#" << shift << markup(">");
         printAnnotation(O, Annot);
         return;
       }
@@ -164,8 +164,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       printRegName(O, Op0.getReg());
       O << ", ";
       printRegName(O, Op1.getReg());
-      O << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #"
-        << Op3.getImm() + 1;
+      O << ", " << markup("<imm:") << "#" << (Is64Bit ? 64 : 32) - Op2.getImm()
+        << markup(">") << ", " << markup("<imm:") << "#"
+        << Op3.getImm() + 1 << markup(">");
       printAnnotation(O, Annot);
       return;
     }
@@ -175,7 +176,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     printRegName(O, Op0.getReg());
     O << ", ";
     printRegName(O, Op1.getReg());
-    O << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
+    O << ", " << markup("<imm:") << "#" << Op2.getImm() << markup(">")
+      << ", " << markup("<imm:") << "#" << Op3.getImm() - Op2.getImm() + 1
+      << markup(">");
     printAnnotation(O, Annot);
     return;
   }
@@ -196,7 +199,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
 
       O << "\tbfc\t";
       printRegName(O, Op0.getReg());
-      O << ", #" << LSB << ", #" << Width;
+      O << ", " << markup("<imm:") << "#" << LSB << markup(">")
+        << ", " << markup("<imm:") << "#" << Width << markup(">");
       printAnnotation(O, Annot);
       return;
     } else if (ImmS < ImmR) {
@@ -209,7 +213,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       printRegName(O, Op0.getReg());
       O << ", ";
       printRegName(O, Op2.getReg());
-      O << ", #" << LSB << ", #" << Width;
+      O << ", " << markup("<imm:") << "#" << LSB << markup(">")
+        << ", " << markup("<imm:") << "#" << Width << markup(">");
       printAnnotation(O, Annot);
       return;
     }
@@ -221,7 +226,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     printRegName(O, Op0.getReg());
     O << ", ";
     printRegName(O, Op2.getReg());
-    O << ", #" << LSB << ", #" << Width;
+    O << ", " << markup("<imm:") << "#" << LSB << markup(">")
+      << ", " << markup("<imm:") << "#" << Width << markup(">");
     printAnnotation(O, Annot);
     return;
   }
@@ -238,8 +244,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       O << "\tmovn\t";
 
     printRegName(O, MI->getOperand(0).getReg());
-    O << ", #";
+    O << ", " << markup("<imm:") << "#";
     MI->getOperand(1).getExpr()->print(O, &MAI);
+    O << markup(">");
     return;
   }
 
@@ -247,8 +254,9 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
       MI->getOperand(2).isExpr()) {
     O << "\tmovk\t";
     printRegName(O, MI->getOperand(0).getReg());
-    O << ", #";
+    O << ", " << markup("<imm:") << "#";
     MI->getOperand(2).getExpr()->print(O, &MAI);
+    O << markup(">");
     return;
   }
 
@@ -267,7 +275,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
                                    Opcode == AArch64::MOVZXi ? 64 : 32)) {
       O << "\tmov\t";
       printRegName(O, MI->getOperand(0).getReg());
-      O << ", #" << formatImm(SignExtend64(Value, RegWidth));
+      O << ", " << markup("<imm:") << "#"
+        << formatImm(SignExtend64(Value, RegWidth)) << markup(">");
       return;
     }
   }
@@ -283,7 +292,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     if (AArch64_AM::isMOVNMovAlias(Value, Shift, RegWidth)) {
       O << "\tmov\t";
       printRegName(O, MI->getOperand(0).getReg());
-      O << ", #" << formatImm(SignExtend64(Value, RegWidth));
+      O << ", " << markup("<imm:") << "#"
+        << formatImm(SignExtend64(Value, RegWidth)) << markup(">");
       return;
     }
   }
@@ -298,7 +308,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
     if (!AArch64_AM::isAnyMOVWMovAlias(Value, RegWidth)) {
       O << "\tmov\t";
       printRegName(O, MI->getOperand(0).getReg());
-      O << ", #" << formatImm(SignExtend64(Value, RegWidth));
+      O << ", " << markup("<imm:") << "#"
+        << formatImm(SignExtend64(Value, RegWidth)) << markup(">");
       return;
     }
   }
@@ -779,7 +790,8 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, uint64_t Address,
         printRegName(O, Reg);
       } else {
         assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
-        O << ", #" << LdStDesc->NaturalOffset;
+        O << ", " << markup("<imm:") << "#"
+          << LdStDesc->NaturalOffset << markup(">");
       }
     }
 
@@ -924,14 +936,14 @@ void AArch64InstPrinter::printImm(const MCInst *MI, unsigned OpNo,
                                      const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
-  O << "#" << formatImm(Op.getImm());
+  O << markup("<imm:") << "#" << formatImm(Op.getImm()) << markup(">");
 }
 
 void AArch64InstPrinter::printImmHex(const MCInst *MI, unsigned OpNo,
                                      const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
-  O << format("#%#llx", Op.getImm());
+  O << markup("<imm:") << format("#%#llx", Op.getImm()) << markup(">");
 }
 
 template<int Size>
@@ -953,7 +965,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
     if (Reg == AArch64::XZR)
-      O << "#" << Imm;
+      O << markup("<imm:") << "#" << Imm << markup(">");
     else
       printRegName(O, Reg);
   } else
@@ -986,7 +998,7 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
     assert(Val == MO.getImm() && "Add/sub immediate out of range!");
     unsigned Shift =
         AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
-    O << '#' << formatImm(Val);
+    O << markup("<imm:") << '#' << formatImm(Val) << markup(">");
     if (Shift != 0)
       printShifter(MI, OpNum + 1, STI, O);
 
@@ -1004,8 +1016,9 @@ void AArch64InstPrinter::printLogicalImm(const MCInst *MI, unsigned OpNum,
                                          const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
-  O << "#0x";
+  O << markup("<imm:") << "#0x";
   O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 8 * sizeof(T)));
+  O << markup(">");
 }
 
 void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
@@ -1017,7 +1030,8 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
       AArch64_AM::getShiftValue(Val) == 0)
     return;
   O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val))
-    << " #" << AArch64_AM::getShiftValue(Val);
+    << " " << markup("<imm:") << "#" << AArch64_AM::getShiftValue(Val)
+    << markup(">");
 }
 
 void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
@@ -1052,18 +1066,18 @@ void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
          ((Dest == AArch64::WSP || Src1 == AArch64::WSP) &&
           ExtType == AArch64_AM::UXTW) ) {
       if (ShiftVal != 0)
-        O << ", lsl #" << ShiftVal;
+        O << ", lsl " << markup("<imm:") << "#" << ShiftVal << markup(">");
       return;
     }
   }
   O << ", " << AArch64_AM::getShiftExtendName(ExtType);
   if (ShiftVal != 0)
-    O << " #" << ShiftVal;
+    O << " " << markup("<imm:") << "#" << ShiftVal << markup(">");
 }
 
 static void printMemExtendImpl(bool SignExtend, bool DoShift,
                                unsigned Width, char SrcRegKind,
-                               raw_ostream &O) {
+                               raw_ostream &O, bool UseMarkup) {
   // sxtw, sxtx, uxtw or lsl (== uxtx)
   bool IsLSL = !SignExtend && SrcRegKind == 'x';
   if (IsLSL)
@@ -1071,8 +1085,14 @@ static void printMemExtendImpl(bool SignExtend, bool DoShift,
   else
     O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind;
 
-  if (DoShift || IsLSL)
-    O << " #" << Log2_32(Width / 8);
+  if (DoShift || IsLSL) {
+    O << " ";
+    if (UseMarkup)
+      O << "<imm:";
+    O << "#" << Log2_32(Width / 8);
+    if (UseMarkup)
+      O << ">";
+  }
 }
 
 void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
@@ -1080,7 +1100,7 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
                                         unsigned Width) {
   bool SignExtend = MI->getOperand(OpNum).getImm();
   bool DoShift = MI->getOperand(OpNum + 1).getImm();
-  printMemExtendImpl(SignExtend, DoShift, Width, SrcRegKind, O);
+  printMemExtendImpl(SignExtend, DoShift, Width, SrcRegKind, O, UseMarkup);
 }
 
 template <bool SignExtend, int ExtWidth, char SrcRegKind, char Suffix>
@@ -1097,7 +1117,7 @@ void AArch64InstPrinter::printRegWithShiftExtend(const MCInst *MI,
   bool DoShift = ExtWidth != 8;
   if (SignExtend || DoShift || SrcRegKind == 'w') {
     O << ", ";
-    printMemExtendImpl(SignExtend, DoShift, ExtWidth, SrcRegKind, O);
+    printMemExtendImpl(SignExtend, DoShift, ExtWidth, SrcRegKind, O, UseMarkup);
   }
 }
 
@@ -1127,14 +1147,16 @@ template<int Scale>
 void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
                                        const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
-  O << '#' << formatImm(Scale * MI->getOperand(OpNum).getImm());
+  O << markup("<imm:") << '#'
+    << formatImm(Scale * MI->getOperand(OpNum).getImm()) << markup(">");
 }
 
 void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
                                            unsigned Scale, raw_ostream &O) {
   const MCOperand MO = MI->getOperand(OpNum);
   if (MO.isImm()) {
-    O << "#" << formatImm(MO.getImm() * Scale);
+    O << markup("<imm:") << "#" << formatImm(MO.getImm() * Scale)
+      << markup(">");
   } else {
     assert(MO.isExpr() && "Unexpected operand type!");
     MO.getExpr()->print(O, &MAI);
@@ -1147,7 +1169,8 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
   O << '[';
   printRegName(O, MI->getOperand(OpNum).getReg());
   if (MO1.isImm()) {
-      O << ", #" << formatImm(MO1.getImm() * Scale);
+      O << ", " << markup("<imm:") << "#" << formatImm(MO1.getImm() * Scale)
+        << markup(">");
   } else {
     assert(MO1.isExpr() && "Unexpected operand type!");
     O << ", ";
@@ -1171,7 +1194,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
     return;
   }
 
-  O << '#' << formatImm(prfop);
+  O << markup("<imm:") << '#' << formatImm(prfop) << markup(">");
 }
 
 void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
@@ -1182,7 +1205,7 @@ void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
   if (PSB)
     O << PSB->Name;
   else
-    O << '#' << formatImm(psbhintop);
+    O << markup("<imm:") << '#' << formatImm(psbhintop) << markup(">");
 }
 
 void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
@@ -1193,7 +1216,7 @@ void AArch64InstPrinter::printBTIHintOp(const MCInst *MI, unsigned OpNum,
   if (BTI)
     O << BTI->Name;
   else
-    O << '#' << formatImm(btihintop);
+    O << markup("<imm:") << '#' << formatImm(btihintop) << markup(">");
 }
 
 void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
@@ -1204,7 +1227,7 @@ void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
       MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm());
 
   // 8 decimal places are enough to perfectly represent permitted floats.
-  O << format("#%.8f", FPImm);
+  O << markup("<imm:") << format("#%.8f", FPImm) << markup(">");
 }
 
 static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
@@ -1398,11 +1421,17 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
   // If the label has already been resolved to an immediate offset (say, when
   // we're running the disassembler), just print the immediate.
   if (Op.isImm()) {
+
+    O << markup("<imm:");
+
     int64_t Offset = Op.getImm() * 4;
     if (PrintBranchImmAsAddress)
       O << formatHex(Address + Offset);
     else
       O << "#" << formatImm(Offset);
+
+    O << markup(">");
+
     return;
   }
 
@@ -1428,10 +1457,12 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
   // we're running the disassembler), just print the immediate.
   if (Op.isImm()) {
     const int64_t Offset = Op.getImm() * 4096;
+    O << markup("<imm:");
     if (PrintBranchImmAsAddress)
       O << formatHex((Address & -4096) + Offset);
     else
       O << "#" << Offset;
+    O << markup(">");
     return;
   }
 
@@ -1459,7 +1490,7 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
   if (!Name.empty())
     O << Name;
   else
-    O << "#" << Val;
+    O << markup("<imm:") << "#" << Val << markup(">");
 }
 
 void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
@@ -1539,7 +1570,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
   if (PState && PState->haveFeatures(STI.getFeatureBits()))
     O << PState->Name;
   else
-    O << "#" << formatImm(Val);
+    O << markup("<imm:") << "#" << formatImm(Val) << markup(">");
 }
 
 void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
@@ -1547,7 +1578,7 @@ void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
                                                 raw_ostream &O) {
   unsigned RawVal = MI->getOperand(OpNo).getImm();
   uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
-  O << format("#%#016llx", Val);
+  O << markup("<imm:") << format("#%#016llx", Val) << markup(">");
 }
 
 template<int64_t Angle, int64_t Remainder>
@@ -1555,7 +1586,7 @@ void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
                                                 const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
-  O << "#" << (Val * Angle) + Remainder;
+  O << markup("<imm:") << "#" << (Val * Angle) + Remainder << markup(">");
 }
 
 void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum,
@@ -1565,7 +1596,7 @@ void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum,
   if (auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByEncoding(Val))
     O << Pat->Name;
   else
-    O << '#' << formatImm(Val);
+    O << markup("<imm:") << '#' << formatImm(Val) << markup(">");
 }
 
 template <char suffix>
@@ -1594,9 +1625,9 @@ void AArch64InstPrinter::printImmSVE(T Value, raw_ostream &O) {
   std::make_unsigned_t<T> HexValue = Value;
 
   if (getPrintImmHex())
-    O << '#' << formatHex((uint64_t)HexValue);
+    O << markup("<imm:") << '#' << formatHex((uint64_t)HexValue) << markup(">");
   else
-    O << '#' << formatDec(Value);
+    O << markup("<imm:") << '#' << formatDec(Value) << markup(">");
 
   if (CommentStream) {
     // Do the opposite to that used for instruction operands.
@@ -1618,7 +1649,7 @@ void AArch64InstPrinter::printImm8OptLsl(const MCInst *MI, unsigned OpNum,
 
   // #0 lsl #8 is never pretty printed
   if ((UnscaledVal == 0) && (AArch64_AM::getShiftValue(Shift) != 0)) {
-    O << '#' << formatImm(UnscaledVal);
+    O << markup("<imm:") << '#' << formatImm(UnscaledVal) << markup(">");
     printShifter(MI, OpNum + 1, STI, O);
     return;
   }
@@ -1648,7 +1679,7 @@ void AArch64InstPrinter::printSVELogicalImm(const MCInst *MI, unsigned OpNum,
   else if ((uint16_t)PrintVal == PrintVal)
     printImmSVE(PrintVal, O);
   else
-    O << '#' << formatHex((uint64_t)PrintVal);
+    O << markup("<imm:") << '#' << formatHex((uint64_t)PrintVal) << markup(">");
 }
 
 template <int Width>
@@ -1676,7 +1707,8 @@ void AArch64InstPrinter::printExactFPImm(const MCInst *MI, unsigned OpNum,
   auto *Imm0Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs0);
   auto *Imm1Desc = AArch64ExactFPImm::lookupExactFPImmByEnum(ImmIs1);
   unsigned Val = MI->getOperand(OpNum).getImm();
-  O << "#" << (Val ? Imm1Desc->Repr : Imm0Desc->Repr);
+  O << markup("<imm:") << "#" << (Val ? Imm1Desc->Repr : Imm0Desc->Repr)
+    << markup(">");
 }
 
 void AArch64InstPrinter::printGPR64as32(const MCInst *MI, unsigned OpNum,

From 3b6cfc5d7e9f028333c422fdf95876f6dd9a11ce Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Thu, 26 Nov 2020 19:06:21 +0100
Subject: [PATCH 233/244] InstPrinter: Add SystemZ markup

---
 .../MCTargetDesc/SystemZInstPrinter.cpp       | 95 +++++++++++++------
 .../SystemZ/MCTargetDesc/SystemZInstPrinter.h |  4 +-
 llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp |  5 +-
 3 files changed, 70 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index fac363cae713..ca9f86ef04e3 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -24,32 +24,55 @@ using namespace llvm;
 #include "SystemZGenAsmWriter.inc"
 
 void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
-                                      unsigned Index, raw_ostream &O) {
+                                      unsigned Index, raw_ostream &O,
+                                      bool UseMarkup) {
+  if (UseMarkup)
+    O << "<imm:";
   O << Disp;
+  if (UseMarkup)
+    O << ">";
   if (Base || Index) {
     O << '(';
     if (Index) {
+      if (UseMarkup)
+        O << "<reg:";
       O << '%' << getRegisterName(Index);
+      if (UseMarkup)
+        O << ">";
       if (Base)
         O << ',';
     }
-    if (Base)
+    if (Base) {
+      if (UseMarkup)
+        O << "<reg:";
       O << '%' << getRegisterName(Base);
+      if (UseMarkup)
+        O << ">";
+    }
     O << ')';
   }
 }
 
 void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
-                                      raw_ostream &O) {
+                                      raw_ostream &O, bool UseMarkup) {
   if (MO.isReg()) {
     if (!MO.getReg())
       O << '0';
-    else
+    else {
+      if (UseMarkup)
+        O << "<reg:";
       O << '%' << getRegisterName(MO.getReg());
+      if (UseMarkup)
+        O << ">";
+    }
   }
-  else if (MO.isImm())
+  else if (MO.isImm()) {
+    if (UseMarkup)
+      O << "<reg:";
     O << MO.getImm();
-  else if (MO.isExpr())
+    if (UseMarkup)
+      O << ">";
+  } else if (MO.isExpr())
     MO.getExpr()->print(O, MAI);
   else
     llvm_unreachable("Invalid operand");
@@ -63,94 +86,105 @@ void SystemZInstPrinter::printInst(const MCInst *MI, uint64_t Address,
 }
 
 void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
-  O << '%' << getRegisterName(RegNo);
+  O << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
 }
 
 template <unsigned N>
-static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O,
+                             bool UseMarkup) {
   int64_t Value = MI->getOperand(OpNum).getImm();
   assert(isUInt<N>(Value) && "Invalid uimm argument");
+  if (UseMarkup)
+    O << "<imm:";
   O << Value;
+  if (UseMarkup)
+    O << ">";
 }
 
 template <unsigned N>
-static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O,
+                             bool UseMarkup) {
   int64_t Value = MI->getOperand(OpNum).getImm();
   assert(isInt<N>(Value) && "Invalid simm argument");
+  if (UseMarkup)
+    O << "<imm:";
   O << Value;
+  if (UseMarkup)
+    O << ">";
 }
 
 void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printUImmOperand<1>(MI, OpNum, O);
+  printUImmOperand<1>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printUImmOperand<2>(MI, OpNum, O);
+  printUImmOperand<2>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printUImmOperand<3>(MI, OpNum, O);
+  printUImmOperand<3>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printUImmOperand<4>(MI, OpNum, O);
+  printUImmOperand<4>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printUImmOperand<6>(MI, OpNum, O);
+  printUImmOperand<6>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printSImmOperand<8>(MI, OpNum, O);
+  printSImmOperand<8>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
-  printUImmOperand<8>(MI, OpNum, O);
+  printUImmOperand<8>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  printUImmOperand<12>(MI, OpNum, O);
+  printUImmOperand<12>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  printSImmOperand<16>(MI, OpNum, O);
+  printSImmOperand<16>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  printUImmOperand<16>(MI, OpNum, O);
+  printUImmOperand<16>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  printSImmOperand<32>(MI, OpNum, O);
+  printSImmOperand<32>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  printUImmOperand<32>(MI, OpNum, O);
+  printUImmOperand<32>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printU48ImmOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
-  printUImmOperand<48>(MI, OpNum, O);
+  printUImmOperand<48>(MI, OpNum, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
                                            raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.isImm()) {
-    O << "0x";
+    O << markup("<imm:") << "0x";
     O.write_hex(MO.getImm());
+    O << markup(">");
   } else
     MO.getExpr()->print(O, &MAI);
 }
@@ -181,20 +215,20 @@ void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI,
 
 void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
                                       raw_ostream &O) {
-  printOperand(MI->getOperand(OpNum), &MAI, O);
+  printOperand(MI->getOperand(OpNum), &MAI, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
                                             raw_ostream &O) {
   printAddress(MI->getOperand(OpNum).getReg(),
-               MI->getOperand(OpNum + 1).getImm(), 0, O);
+               MI->getOperand(OpNum + 1).getImm(), 0, O, UseMarkup);
 }
 
 void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
                                              raw_ostream &O) {
   printAddress(MI->getOperand(OpNum).getReg(),
                MI->getOperand(OpNum + 1).getImm(),
-               MI->getOperand(OpNum + 2).getReg(), O);
+               MI->getOperand(OpNum + 2).getReg(), O, UseMarkup);
 }
 
 void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
@@ -204,7 +238,7 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
   uint64_t Length = MI->getOperand(OpNum + 2).getImm();
   O << Disp << '(' << Length;
   if (Base)
-    O << ",%" << getRegisterName(Base);
+    O << "," << markup("<reg:") << "%" << getRegisterName(Base) << markup(">");
   O << ')';
 }
 
@@ -213,9 +247,10 @@ void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
   unsigned Base = MI->getOperand(OpNum).getReg();
   uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
   unsigned Length = MI->getOperand(OpNum + 2).getReg();
-  O << Disp << "(%" << getRegisterName(Length);
+  O << Disp << "(" << markup("<reg:") << "%" << getRegisterName(Length) 
+    << markup(">");
   if (Base)
-    O << ",%" << getRegisterName(Base);
+    O << "," << markup("<reg:") << "%" << getRegisterName(Base) << markup(">");
   O << ')';
 }
 
@@ -223,7 +258,7 @@ void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
                                              raw_ostream &O) {
   printAddress(MI->getOperand(OpNum).getReg(),
                MI->getOperand(OpNum + 1).getImm(),
-               MI->getOperand(OpNum + 2).getReg(), O);
+               MI->getOperand(OpNum + 2).getReg(), O, UseMarkup);
 }
 
 void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 0db7279a06c1..bdbc9b0ef513 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -33,11 +33,11 @@ class SystemZInstPrinter : public MCInstPrinter {
 
   // Print an address with the given base, displacement and index.
   static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
-                           raw_ostream &O);
+                           raw_ostream &O, bool UseMarkup);
 
   // Print the given operand.
   static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
-                           raw_ostream &O);
+                           raw_ostream &O, bool UseMarkup);
 
   // Override MCInstPrinter.
   void printRegName(raw_ostream &O, unsigned RegNo) const override;
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 584737e1d940..f11bf417d54b 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -706,7 +706,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS);
   SystemZMCInstLower Lower(MF->getContext(), *this);
   MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
-  SystemZInstPrinter::printOperand(MO, MAI, OS);
+  SystemZInstPrinter::printOperand(MO, MAI, OS, false);
   return false;
 }
 
@@ -716,7 +716,8 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                               raw_ostream &OS) {
   SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
                                    MI->getOperand(OpNo + 1).getImm(),
-                                   MI->getOperand(OpNo + 2).getReg(), OS);
+                                   MI->getOperand(OpNo + 2).getReg(), OS,
+                                   false);
   return false;
 }
 

From 6011904fae81e713f19190eb471407834beff1c9 Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Fri, 27 Nov 2020 14:19:44 +0100
Subject: [PATCH 234/244] InstPrinter: Add MIPS markup

---
 llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index 3700d6309e1a..3a045c92ee99 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -72,7 +72,8 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) {
 }
 
 void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << '$' << StringRef(getRegisterName(RegNo)).lower();
+  OS << markup("<reg:") << '$' << StringRef(getRegisterName(RegNo)).lower()
+     << markup(">");
 }
 
 void MipsInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -131,7 +132,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   }
 
   if (Op.isImm()) {
-    O << formatImm(Op.getImm());
+    O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
     return;
   }
 
@@ -147,7 +148,7 @@ void MipsInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
     Imm -= Offset;
     Imm &= (1 << Bits) - 1;
     Imm += Offset;
-    O << formatImm(Imm);
+    O << markup("<imm:") << formatImm(Imm) << markup(">");
     return;
   }
 
@@ -175,10 +176,12 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
     break;
   }
 
+  O << markup("<mem:");
   printOperand(MI, opNum+1, O);
   O << "(";
   printOperand(MI, opNum, O);
   O << ")";
+  O << markup(">");
 }
 
 void MipsInstPrinter::

From 9348903dcdbf5ba3db7e24afcfacfaa33b953535 Mon Sep 17 00:00:00 2001
From: Alain Carlucci <alain@rev.ng>
Date: Mon, 14 Dec 2020 15:36:12 +0100
Subject: [PATCH 235/244] InstPrinter: Improve X86 markup

---
 .../X86/MCTargetDesc/X86InstPrinterCommon.cpp |  8 +++++-
 .../X86/MCTargetDesc/X86IntelInstPrinter.cpp  | 27 ++++++++++---------
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index d8dbbbbf2779..47597b910362 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -301,6 +301,9 @@ void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
 
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isImm()) {
+
+    O << markup("<imm:");
+
     if (PrintBranchImmAsAddress) {
       uint64_t Target = Address + Op.getImm();
       if (MAI.getCodePointerSize() == 4)
@@ -308,6 +311,9 @@ void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
       O << formatHex(Target);
     } else
       O << formatImm(Op.getImm());
+
+    O << markup(">");
+
   } else {
     assert(Op.isExpr() && "unknown pcrel immediate operand");
     // If a symbolic branch target was added as a constant expression then print
@@ -315,7 +321,7 @@ void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, uint64_t Address,
     const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
     int64_t Address;
     if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
-      O << formatHex((uint64_t)Address);
+      O << markup("<imm:") << formatHex((uint64_t)Address) << markup(">");
     } else {
       // Otherwise, just print the expression.
       Op.getExpr()->print(O, &MAI);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index d5b205ad9a63..442c3e20dc30 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
 #include "X86GenAsmWriter1.inc"
 
 void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
+  OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
 }
 
 void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
@@ -333,7 +333,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   if (Op.isReg()) {
     printRegName(O, Op.getReg());
   } else if (Op.isImm()) {
-    O << formatImm((int64_t)Op.getImm());
+    O << markup("<imm:") << formatImm((int64_t)Op.getImm()) << markup(">");
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     O << "offset ";
@@ -360,7 +360,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
   // If this has a segment register, print it.
   printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
 
-  O << '[';
+  O << markup("<mem:") << '[';
 
   bool NeedPlus = false;
   if (BaseReg.getReg()) {
@@ -391,28 +391,28 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
           DispVal = -DispVal;
         }
       }
-      O << formatImm(DispVal);
+      O << markup("<imm:") << formatImm(DispVal) << markup(">");
     }
   }
 
-  O << ']';
+  O << ']' << markup(">");
 }
 
 void X86IntelInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
                                       raw_ostream &O) {
   // If this has a segment register, print it.
   printOptionalSegReg(MI, Op + 1, O);
-  O << '[';
+  O << markup("<mem:") << '[';
   printOperand(MI, Op, O);
-  O << ']';
+  O << ']' << markup(">");
 }
 
 void X86IntelInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
                                       raw_ostream &O) {
   // DI accesses are always ES-based.
-  O << "es:[";
+  O << "es:" << markup("<mem:") << '[';
   printOperand(MI, Op, O);
-  O << ']';
+  O << ']' << markup(">");
 }
 
 void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
@@ -422,16 +422,16 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
   // If this has a segment register, print it.
   printOptionalSegReg(MI, Op + 1, O);
 
-  O << '[';
+  O << markup("<mem:") << '[';
 
   if (DispSpec.isImm()) {
-    O << formatImm(DispSpec.getImm());
+    O << markup("<imm:") << formatImm(DispSpec.getImm()) << markup(">");
   } else {
     assert(DispSpec.isExpr() && "non-immediate displacement?");
     DispSpec.getExpr()->print(O, &MAI);
   }
 
-  O << ']';
+  O << ']' << markup(">");
 }
 
 void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
@@ -439,7 +439,8 @@ void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
   if (MI->getOperand(Op).isExpr())
     return MI->getOperand(Op).getExpr()->print(O, &MAI);
 
-  O << formatImm(MI->getOperand(Op).getImm() & 0xff);
+  O << markup("<imm:") << formatImm(MI->getOperand(Op).getImm() & 0xff)
+    << markup(">");
 }
 
 void X86IntelInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,

From bd6c34f61d261dc9070f8244d121666ca4b9411e Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Wed, 27 Jan 2021 12:40:45 +0100
Subject: [PATCH 236/244] YAML::SequenceTraits: support custom inserter

---
 llvm/include/llvm/Support/YAMLTraits.h | 31 ++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 9ac9eb300983..ad938f6573c1 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -1111,16 +1111,42 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
 }
 
+template<typename T>
+class DefaultInserter {
+private:
+  IO &io;
+  T &Seq;
+
+public:
+  DefaultInserter(IO &io, T &Seq) : io(io), Seq(Seq) {}
+
+  auto &preflightElement(unsigned i) {
+    return SequenceTraits<T>::element(io, Seq, i);
+  }
+
+  void postflightElement(unsigned i) {};
+};
+
+template<typename T>
+auto getInserter(const T &) -> typename SequenceTraits<T>::Inserter {
+  return std::declval<typename SequenceTraits<T>::Inserter>();
+}
+
+template<typename T>
+DefaultInserter<T> getInserter(...) { return DefaultInserter<T>{}; }
+
 template <typename T, typename Context>
 std::enable_if_t<has_SequenceTraits<T>::value, void>
 yamlize(IO &io, T &Seq, bool, Context &Ctx) {
+  decltype(getInserter<T>(std::declval<T>())) I(io, Seq);
   if ( has_FlowTraits< SequenceTraits<T>>::value ) {
     unsigned incnt = io.beginFlowSequence();
     unsigned count = io.outputting() ? SequenceTraits<T>::size(io, Seq) : incnt;
     for(unsigned i=0; i < count; ++i) {
       void *SaveInfo;
       if ( io.preflightFlowElement(i, SaveInfo) ) {
-        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true, Ctx);
+        yamlize(io, I.preflightElement(i), true, Ctx);
+        I.postflightElement(i);
         io.postflightFlowElement(SaveInfo);
       }
     }
@@ -1132,7 +1158,8 @@ yamlize(IO &io, T &Seq, bool, Context &Ctx) {
     for(unsigned i=0; i < count; ++i) {
       void *SaveInfo;
       if ( io.preflightElement(i, SaveInfo) ) {
-        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true, Ctx);
+        yamlize(io, I.preflightElement(i), true, Ctx);
+        I.postflightElement(i);
         io.postflightElement(SaveInfo);
       }
     }

From a29124e3a99577d68597c6d2d49bd82729152509 Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Wed, 10 Feb 2021 10:32:11 +0100
Subject: [PATCH 237/244] YAMLTraits: support quoted EnumScalar

---
 llvm/include/llvm/Support/YAMLTraits.h | 28 +++++++++++++++++++-------
 llvm/lib/Support/YAMLTraits.cpp        |  7 ++++---
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index ad938f6573c1..3c8f913a9907 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -777,7 +777,9 @@ class IO {
   virtual void endFlowMapping() = 0;
 
   virtual void beginEnumScalar() = 0;
-  virtual bool matchEnumScalar(const char*, bool) = 0;
+  virtual bool matchEnumScalar(const char*,
+                               bool,
+                               QuotingType=QuotingType::None) = 0;
   virtual bool matchEnumFallback() = 0;
   virtual void endEnumScalar() = 0;
 
@@ -795,16 +797,24 @@ class IO {
   virtual void setAllowUnknownKeys(bool Allow);
 
   template <typename T>
-  void enumCase(T &Val, const char* Str, const T ConstVal) {
-    if ( matchEnumScalar(Str, outputting() && Val == ConstVal) ) {
+  void enumCase(T &Val,
+                const char* Str,
+                const T ConstVal,
+                QuotingType MustQuote=QuotingType::None) {
+    if ( matchEnumScalar(Str, outputting() && Val == ConstVal, MustQuote) ) {
       Val = ConstVal;
     }
   }
 
   // allow anonymous enum values to be used with LLVM_YAML_STRONG_TYPEDEF
   template <typename T>
-  void enumCase(T &Val, const char* Str, const uint32_t ConstVal) {
-    if ( matchEnumScalar(Str, outputting() && Val == static_cast<T>(ConstVal)) ) {
+  void enumCase(T &Val,
+                const char* Str,
+                const uint32_t ConstVal,
+                QuotingType MustQuote=QuotingType::None) {
+    if ( matchEnumScalar(Str,
+                         outputting() && Val == static_cast<T>(ConstVal),
+                         MustQuote) ) {
       Val = ConstVal;
     }
   }
@@ -1434,7 +1444,9 @@ class Input : public IO {
   void postflightFlowElement(void *) override;
   void endFlowSequence() override;
   void beginEnumScalar() override;
-  bool matchEnumScalar(const char*, bool) override;
+  bool matchEnumScalar(const char*,
+                       bool,
+                       QuotingType=QuotingType::None) override;
   bool matchEnumFallback() override;
   void endEnumScalar() override;
   bool beginBitSetScalar(bool &) override;
@@ -1590,7 +1602,9 @@ class Output : public IO {
   void postflightFlowElement(void *) override;
   void endFlowSequence() override;
   void beginEnumScalar() override;
-  bool matchEnumScalar(const char*, bool) override;
+  bool matchEnumScalar(const char*,
+                       bool,
+                       QuotingType=QuotingType::None) override;
   bool matchEnumFallback() override;
   void endEnumScalar() override;
   bool beginBitSetScalar(bool &) override;
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index aa6163a76161..799bfe8923aa 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -272,7 +272,7 @@ void Input::beginEnumScalar() {
   ScalarMatchFound = false;
 }
 
-bool Input::matchEnumScalar(const char *Str, bool) {
+bool Input::matchEnumScalar(const char *Str, bool, QuotingType MustQuote) {
   if (ScalarMatchFound)
     return false;
   if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
@@ -648,10 +648,11 @@ void Output::beginEnumScalar() {
   EnumerationMatchFound = false;
 }
 
-bool Output::matchEnumScalar(const char *Str, bool Match) {
+bool Output::matchEnumScalar(const char *Str, bool Match, QuotingType MustQuote) {
   if (Match && !EnumerationMatchFound) {
     newLineCheck();
-    outputUpToEndOfLine(Str);
+    StringRef String(Str);
+    scalarString(String, MustQuote);
     EnumerationMatchFound = true;
   }
   return false;

From 2cca6589f2219e69f62a6fbcc4b16d52872c5e7a Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Mon, 24 Feb 2020 14:29:20 +0100
Subject: [PATCH 238/244] Add support to Dominators for FilteredGraphTraits

Before this patch, LLVM's DominatorTree and related templates
(DominatorTree, DomTreeBuilder) did not support the computation of
dominator and post-dominator trees on graphs with markers, such as
GraphTraits<llvm::Inverse<T>>.
They only supported plain graph traits.

This patch restructures all the necessary templates, adding an
additional template argument (View), which allows to compute DT and PDT
on all the graphs that use GraphTraits, potentially equipped with a
View.
View is a marker, that allows to provide a different view on a graph
that has GraphTraits.
A typical example of this is llvm::Inverse.

This mechanism of View markers will be used in rev.ng to implement
filtered graph traits, to filter the edges of a graph implemented with
GraphTraits.
The new DT and PDT on the marked version of the GraphTraits will be
computed on the same graphs, but treating some edges as if they are seen
through the view.
For instance, the Post-Dominator Tree of a graph marked with
llvm::Inverse view is the Dominator Tree.

The previous default behavior of DT and PDT is implemented by means of
the DTIdentityView, which simply leaves the node type of the GraphTraits
untouched.
This is a transparent view that allows all the code in llvm to continue
working as before this patch.
---
 .../llvm/Analysis/IteratedDominanceFrontier.h |   2 +-
 llvm/include/llvm/Analysis/LoopInfo.h         |  11 +-
 llvm/include/llvm/CodeGen/LiveIntervalCalc.h  |   9 +-
 llvm/include/llvm/CodeGen/LiveRangeCalc.h     |  14 +-
 llvm/include/llvm/CodeGen/MachineDominators.h |   6 +-
 llvm/include/llvm/IR/Dominators.h             |  81 +++---
 llvm/include/llvm/Support/CFGDiff.h           |   4 +-
 llvm/include/llvm/Support/GenericDomTree.h    | 251 ++++++++++--------
 .../llvm/Support/GenericDomTreeConstruction.h |  88 +++---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  11 +-
 llvm/lib/Analysis/MemorySSAUpdater.cpp        |   6 +-
 llvm/lib/CodeGen/MachineDominators.cpp        |   4 +-
 llvm/lib/CodeGen/MachinePostDominators.cpp    |   2 +-
 llvm/lib/IR/Dominators.cpp                    |  52 ++--
 llvm/lib/Transforms/Vectorize/VPlan.cpp       |   2 +-
 .../IR/DominatorTreeBatchUpdatesTest.cpp      |   4 +-
 16 files changed, 310 insertions(+), 237 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 8166b52aa226..41ae1a572140 100644
--- a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -73,7 +73,7 @@ ChildrenGetterTy<BasicBlock, IsPostDom>::get(const NodeRef &N) {
     return {Children.begin(), Children.end()};
   }
 
-  return GD->template getChildren<IsPostDom>(N);
+  return GD->template getChildren<IsPostDom, DTIdentityView>(N);
 }
 
 } // end of namespace IDFCalculatorDetail
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index a5717bae12c3..b0130611bad7 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -62,7 +62,16 @@ class MDNode;
 class MemorySSAUpdater;
 class ScalarEvolution;
 class raw_ostream;
-template <class N, bool IsPostDom> class DominatorTreeBase;
+
+template<typename X>
+using DTIdentityView = X;
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+class DominatorTreeOnView;
+
+template <typename NodeT, bool IsPostDom>
+using DominatorTreeBase = DominatorTreeOnView<NodeT, IsPostDom, DTIdentityView>;
+
 template <class N, class M> class LoopInfoBase;
 template <class N, class M> class LoopBase;
 
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalCalc.h b/llvm/include/llvm/CodeGen/LiveIntervalCalc.h
index 76005e835595..ee2bf65a3657 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervalCalc.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervalCalc.h
@@ -21,7 +21,14 @@
 
 namespace llvm {
 
-template <class NodeT> class DomTreeNodeBase;
+template<typename X>
+using DTIdentityView = X;
+
+template <typename NodeT, template<typename> class View>
+class DomTreeNodeOnView;
+
+template <typename NodeT>
+using DomTreeNodeBase = DomTreeNodeOnView<NodeT, DTIdentityView>;
 
 using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
 
diff --git a/llvm/include/llvm/CodeGen/LiveRangeCalc.h b/llvm/include/llvm/CodeGen/LiveRangeCalc.h
index bbb6f2ddd233..f0cf1fad723b 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeCalc.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeCalc.h
@@ -36,13 +36,21 @@
 
 namespace llvm {
 
-template <class NodeT> class DomTreeNodeBase;
+template<typename X>
+using DTIdentityView = X;
+
+template <typename NodeT, template<typename> class View>
+class DomTreeNodeOnView;
+
+template <typename NodeT>
+using DomTreeNodeBase = DomTreeNodeOnView<NodeT, DTIdentityView>;
+
+using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
+
 class MachineDominatorTree;
 class MachineFunction;
 class MachineRegisterInfo;
 
-using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
-
 class LiveRangeCalc {
   const MachineFunction *MF = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h
index 46bf73cdd7b6..a96dea5985c0 100644
--- a/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -32,9 +32,9 @@ inline void DominatorTreeBase<MachineBasicBlock, false>::addRoot(
   this->Roots.push_back(MBB);
 }
 
-extern template class DomTreeNodeBase<MachineBasicBlock>;
-extern template class DominatorTreeBase<MachineBasicBlock, false>; // DomTree
-extern template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTree
+extern template class DomTreeNodeOnView<MachineBasicBlock, DTIdentityView>;
+extern template class DominatorTreeOnView<MachineBasicBlock, false, DTIdentityView>; // DomTree
+extern template class DominatorTreeOnView<MachineBasicBlock, true, DTIdentityView>; // PostDomTree
 
 using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
 
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index 08dbccaf2c01..de5a5306669d 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -32,50 +32,57 @@ class Instruction;
 class Module;
 class raw_ostream;
 
-extern template class DomTreeNodeBase<BasicBlock>;
-extern template class DominatorTreeBase<BasicBlock, false>; // DomTree
-extern template class DominatorTreeBase<BasicBlock, true>; // PostDomTree
+extern template class DomTreeNodeOnView<BasicBlock, DTIdentityView>;
+extern template class DominatorTreeOnView<BasicBlock, false, DTIdentityView>; // DomTree
+extern template class DominatorTreeOnView<BasicBlock, true, DTIdentityView>; // PostDomTree
 
 extern template class cfg::Update<BasicBlock *>;
 
 namespace DomTreeBuilder {
-using BBDomTree = DomTreeBase<BasicBlock>;
-using BBPostDomTree = PostDomTreeBase<BasicBlock>;
+template<template<typename> class View>
+using BBDomTreeOnView = DomTreeOnView<BasicBlock, View>;
+
+using BBDomTree = DomTreeOnView<BasicBlock, DTIdentityView>;
+
+template<template<typename> class View>
+using BBPostDomTreeOnView = PostDomTreeOnView<BasicBlock, View>;
+
+using BBPostDomTree = PostDomTreeOnView<BasicBlock, DTIdentityView>;
 
 using BBUpdates = ArrayRef<llvm::cfg::Update<BasicBlock *>>;
 
-using BBDomTreeGraphDiff = GraphDiff<BasicBlock *, false>;
-using BBPostDomTreeGraphDiff = GraphDiff<BasicBlock *, true>;
-
-extern template void Calculate<BBDomTree>(BBDomTree &DT);
-extern template void CalculateWithUpdates<BBDomTree>(BBDomTree &DT,
-                                                     BBUpdates U);
-
-extern template void Calculate<BBPostDomTree>(BBPostDomTree &DT);
-
-extern template void InsertEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From,
-                                           BasicBlock *To);
-extern template void InsertEdge<BBPostDomTree>(BBPostDomTree &DT,
-                                               BasicBlock *From,
-                                               BasicBlock *To);
-
-extern template void DeleteEdge<BBDomTree>(BBDomTree &DT, BasicBlock *From,
-                                           BasicBlock *To);
-extern template void DeleteEdge<BBPostDomTree>(BBPostDomTree &DT,
-                                               BasicBlock *From,
-                                               BasicBlock *To);
-
-extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT,
-                                             BBDomTreeGraphDiff &,
-                                             BBDomTreeGraphDiff *);
-extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT,
-                                                 BBPostDomTreeGraphDiff &,
-                                                 BBPostDomTreeGraphDiff *);
-
-extern template bool Verify<BBDomTree>(const BBDomTree &DT,
-                                       BBDomTree::VerificationLevel VL);
-extern template bool Verify<BBPostDomTree>(const BBPostDomTree &DT,
-                                           BBPostDomTree::VerificationLevel VL);
+using BBDomTreeGraphDiff = GraphDiff<BBDomTree::NodePtr, false>;
+using BBPostDomTreeGraphDiff = GraphDiff<BBPostDomTree::NodePtr, true>;
+
+extern template void Calculate<BasicBlock, false, DTIdentityView>(BBDomTree &DT);
+extern template void
+CalculateWithUpdates<BasicBlock, false, DTIdentityView>(BBDomTree &DT, BBUpdates U);
+
+extern template void Calculate<BasicBlock, true, DTIdentityView>(BBPostDomTree &DT);
+
+extern template void
+InsertEdge<BasicBlock, false, DTIdentityView>(BBDomTree &DT, BasicBlock *From, BasicBlock *To);
+extern template void
+InsertEdge<BasicBlock, true, DTIdentityView>(BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
+
+extern template void
+DeleteEdge<BasicBlock, false, DTIdentityView>(BBDomTree &DT, BasicBlock *From, BasicBlock *To);
+extern template void
+DeleteEdge<BasicBlock, true, DTIdentityView>(BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
+
+extern template void
+ApplyUpdates<BasicBlock, false, DTIdentityView>(BBDomTree &DT,
+                                                BBDomTreeGraphDiff &,
+                                                BBDomTreeGraphDiff *);
+extern template void
+ApplyUpdates<BasicBlock, true, DTIdentityView>(BBPostDomTree &DT,
+                                               BBPostDomTreeGraphDiff &,
+                                               BBPostDomTreeGraphDiff *);
+
+extern template bool
+Verify<BasicBlock, false, DTIdentityView>(const BBDomTree &DT, BBDomTree::VerificationLevel VL);
+extern template bool
+Verify<BasicBlock, true, DTIdentityView>(const BBPostDomTree &DT, BBPostDomTree::VerificationLevel VL);
 }  // namespace DomTreeBuilder
 
 using DomTreeNode = DomTreeNodeBase<BasicBlock>;
diff --git a/llvm/include/llvm/Support/CFGDiff.h b/llvm/include/llvm/Support/CFGDiff.h
index c90b9aca78b5..07cf242d67a8 100644
--- a/llvm/include/llvm/Support/CFGDiff.h
+++ b/llvm/include/llvm/Support/CFGDiff.h
@@ -132,9 +132,9 @@ template <typename NodePtr, bool InverseGraph = false> class GraphDiff {
   }
 
   using VectRet = SmallVector<NodePtr, 8>;
-  template <bool InverseEdge> VectRet getChildren(NodePtr N) const {
+  template <bool InverseEdge, template <typename> class View> VectRet getChildren(NodePtr N) const {
     using DirectedNodeT =
-        std::conditional_t<InverseEdge, Inverse<NodePtr>, NodePtr>;
+        std::conditional_t<InverseEdge, Inverse<View<NodePtr>>, View<NodePtr>>;
     auto R = children<DirectedNodeT>(N);
     VectRet Res = VectRet(detail::reverse_if<!InverseEdge>(R));
 
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index 18e08dbcd175..0bb6c5749a56 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -41,44 +41,51 @@
 
 namespace llvm {
 
+template<typename X>
+using DTIdentityView = X;
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+class DominatorTreeOnView;
+
 template <typename NodeT, bool IsPostDom>
-class DominatorTreeBase;
+using DominatorTreeBase = DominatorTreeOnView<NodeT, IsPostDom, DTIdentityView>;
 
 namespace DomTreeBuilder {
-template <typename DomTreeT>
-struct SemiNCAInfo;
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+struct SemiNCAInfoOnView;
 }  // namespace DomTreeBuilder
 
 /// Base class for the actual dominator tree node.
-template <class NodeT> class DomTreeNodeBase {
+template <class NodeT, template<typename> class View>
+class DomTreeNodeOnView {
   friend class PostDominatorTree;
-  friend class DominatorTreeBase<NodeT, false>;
-  friend class DominatorTreeBase<NodeT, true>;
-  friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase<NodeT, false>>;
-  friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase<NodeT, true>>;
+  friend class DominatorTreeOnView<NodeT, false, View>;
+  friend class DominatorTreeOnView<NodeT, true, View>;
+  friend struct DomTreeBuilder::SemiNCAInfoOnView<NodeT, false, View>;
+  friend struct DomTreeBuilder::SemiNCAInfoOnView<NodeT, true, View>;
 
   NodeT *TheBB;
-  DomTreeNodeBase *IDom;
+  DomTreeNodeOnView *IDom;
   unsigned Level;
-  SmallVector<DomTreeNodeBase *, 4> Children;
+  SmallVector<DomTreeNodeOnView *, 4> Children;
   mutable unsigned DFSNumIn = ~0;
   mutable unsigned DFSNumOut = ~0;
 
  public:
-  DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom)
+  DomTreeNodeOnView(NodeT *BB, DomTreeNodeOnView *iDom)
       : TheBB(BB), IDom(iDom), Level(IDom ? IDom->Level + 1 : 0) {}
 
-  using iterator = typename SmallVector<DomTreeNodeBase *, 4>::iterator;
+  using iterator = typename SmallVector<DomTreeNodeOnView *, 4>::iterator;
   using const_iterator =
-      typename SmallVector<DomTreeNodeBase *, 4>::const_iterator;
+      typename SmallVector<DomTreeNodeOnView *, 4>::const_iterator;
 
   iterator begin() { return Children.begin(); }
   iterator end() { return Children.end(); }
   const_iterator begin() const { return Children.begin(); }
   const_iterator end() const { return Children.end(); }
 
-  DomTreeNodeBase *const &back() const { return Children.back(); }
-  DomTreeNodeBase *&back() { return Children.back(); }
+  DomTreeNodeOnView *const &back() const { return Children.back(); }
+  DomTreeNodeOnView *&back() { return Children.back(); }
 
   iterator_range<iterator> children() { return make_range(begin(), end()); }
   iterator_range<const_iterator> children() const {
@@ -86,11 +93,11 @@ template <class NodeT> class DomTreeNodeBase {
   }
 
   NodeT *getBlock() const { return TheBB; }
-  DomTreeNodeBase *getIDom() const { return IDom; }
+  DomTreeNodeOnView *getIDom() const { return IDom; }
   unsigned getLevel() const { return Level; }
 
-  std::unique_ptr<DomTreeNodeBase> addChild(
-      std::unique_ptr<DomTreeNodeBase> C) {
+  std::unique_ptr<DomTreeNodeOnView> addChild(
+      std::unique_ptr<DomTreeNodeOnView> C) {
     Children.push_back(C.get());
     return C;
   }
@@ -100,19 +107,19 @@ template <class NodeT> class DomTreeNodeBase {
 
   void clearAllChildren() { Children.clear(); }
 
-  bool compare(const DomTreeNodeBase *Other) const {
+  bool compare(const DomTreeNodeOnView *Other) const {
     if (getNumChildren() != Other->getNumChildren())
       return true;
 
     if (Level != Other->Level) return true;
 
     SmallPtrSet<const NodeT *, 4> OtherChildren;
-    for (const DomTreeNodeBase *I : *Other) {
+    for (const DomTreeNodeOnView *I : *Other) {
       const NodeT *Nd = I->getBlock();
       OtherChildren.insert(Nd);
     }
 
-    for (const DomTreeNodeBase *I : *this) {
+    for (const DomTreeNodeOnView *I : *this) {
       const NodeT *N = I->getBlock();
       if (OtherChildren.count(N) == 0)
         return true;
@@ -120,7 +127,7 @@ template <class NodeT> class DomTreeNodeBase {
     return false;
   }
 
-  void setIDom(DomTreeNodeBase *NewIDom) {
+  void setIDom(DomTreeNodeOnView *NewIDom) {
     assert(IDom && "No immediate dominator?");
     if (IDom == NewIDom) return;
 
@@ -146,7 +153,7 @@ template <class NodeT> class DomTreeNodeBase {
 private:
   // Return true if this node is dominated by other. Use this only if DFS info
   // is valid.
-  bool DominatedBy(const DomTreeNodeBase *other) const {
+  bool DominatedBy(const DomTreeNodeOnView *other) const {
     return this->DFSNumIn >= other->DFSNumIn &&
            this->DFSNumOut <= other->DFSNumOut;
   }
@@ -155,13 +162,13 @@ template <class NodeT> class DomTreeNodeBase {
     assert(IDom);
     if (Level == IDom->Level + 1) return;
 
-    SmallVector<DomTreeNodeBase *, 64> WorkStack = {this};
+    SmallVector<DomTreeNodeOnView *, 64> WorkStack = {this};
 
     while (!WorkStack.empty()) {
-      DomTreeNodeBase *Current = WorkStack.pop_back_val();
+      DomTreeNodeOnView *Current = WorkStack.pop_back_val();
       Current->Level = Current->IDom->Level + 1;
 
-      for (DomTreeNodeBase *C : *Current) {
+      for (DomTreeNodeOnView *C : *Current) {
         assert(C->IDom);
         if (C->Level != C->IDom->Level + 1) WorkStack.push_back(C);
       }
@@ -170,7 +177,10 @@ template <class NodeT> class DomTreeNodeBase {
 };
 
 template <class NodeT>
-raw_ostream &operator<<(raw_ostream &O, const DomTreeNodeBase<NodeT> *Node) {
+using DomTreeNodeBase = DomTreeNodeOnView<NodeT, DTIdentityView>;
+
+template <class NodeT, template<typename> class View>
+raw_ostream &operator<<(raw_ostream &O, const DomTreeNodeOnView<NodeT, View> *Node) {
   if (Node->getBlock())
     Node->getBlock()->printAsOperand(O, false);
   else
@@ -182,11 +192,11 @@ raw_ostream &operator<<(raw_ostream &O, const DomTreeNodeBase<NodeT> *Node) {
   return O;
 }
 
-template <class NodeT>
-void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &O,
+template <class NodeT, template<typename> class View>
+void PrintDomTree(const DomTreeNodeOnView<NodeT, View> *N, raw_ostream &O,
                   unsigned Lev) {
   O.indent(2 * Lev) << "[" << Lev << "] " << N;
-  for (typename DomTreeNodeBase<NodeT>::const_iterator I = N->begin(),
+  for (typename DomTreeNodeOnView<NodeT, View>::const_iterator I = N->begin(),
                                                        E = N->end();
        I != E; ++I)
     PrintDomTree<NodeT>(*I, O, Lev + 1);
@@ -194,41 +204,42 @@ void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &O,
 
 namespace DomTreeBuilder {
 // The routines below are provided in a separate header but referenced here.
-template <typename DomTreeT>
-void Calculate(DomTreeT &DT);
-
-template <typename DomTreeT>
-void CalculateWithUpdates(DomTreeT &DT,
-                          ArrayRef<typename DomTreeT::UpdateType> Updates);
-
-template <typename DomTreeT>
-void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
-                typename DomTreeT::NodePtr To);
-
-template <typename DomTreeT>
-void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
-                typename DomTreeT::NodePtr To);
-
-template <typename DomTreeT>
-void ApplyUpdates(DomTreeT &DT,
-                  GraphDiff<typename DomTreeT::NodePtr,
-                            DomTreeT::IsPostDominator> &PreViewCFG,
-                  GraphDiff<typename DomTreeT::NodePtr,
-                            DomTreeT::IsPostDominator> *PostViewCFG);
-
-template <typename DomTreeT>
-bool Verify(const DomTreeT &DT, typename DomTreeT::VerificationLevel VL);
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void Calculate(DominatorTreeOnView<NodeT, IsPostDom, View> &DT);
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void CalculateWithUpdates(DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+                          ArrayRef<typename DominatorTreeOnView<NodeT, IsPostDom, View>::UpdateType> Updates);
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void InsertEdge(DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+                typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr From,
+                typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr To);
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void DeleteEdge(DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+                typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr From,
+                typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr To);
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void ApplyUpdates(DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+                  GraphDiff<typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr, IsPostDom> &PreViewCFG,
+                  GraphDiff<typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr, IsPostDom> *PostViewCFG);
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+bool Verify(const DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+            typename DominatorTreeOnView<NodeT, IsPostDom, View>::VerificationLevel VL);
 }  // namespace DomTreeBuilder
 
 /// Core dominator tree base class.
 ///
 /// This class is a generic template over graph nodes. It is instantiated for
 /// various graphs in the LLVM IR or in the code generator.
-template <typename NodeT, bool IsPostDom>
-class DominatorTreeBase {
+template <typename NodeT, bool IsPostDom, template<typename X> class View>
+class DominatorTreeOnView {
  public:
   static_assert(std::is_pointer<typename GraphTraits<NodeT *>::NodeRef>::value,
-                "Currently DominatorTreeBase supports only pointer nodes");
+                "Currently DominatorTreeOnView supports only pointer nodes");
   using NodeType = NodeT;
   using NodePtr = NodeT *;
   using ParentPtr = decltype(std::declval<NodeT *>()->getParent());
@@ -249,20 +260,21 @@ class DominatorTreeBase {
   SmallVector<NodeT *, IsPostDom ? 4 : 1> Roots;
 
   using DomTreeNodeMapType =
-     DenseMap<NodeT *, std::unique_ptr<DomTreeNodeBase<NodeT>>>;
+     DenseMap<NodeT *, std::unique_ptr<DomTreeNodeOnView<NodeT, View>>>;
   DomTreeNodeMapType DomTreeNodes;
-  DomTreeNodeBase<NodeT> *RootNode = nullptr;
+  DomTreeNodeOnView<NodeT, View> *RootNode = nullptr;
   ParentPtr Parent = nullptr;
 
   mutable bool DFSInfoValid = false;
   mutable unsigned int SlowQueries = 0;
 
-  friend struct DomTreeBuilder::SemiNCAInfo<DominatorTreeBase>;
+  friend struct DomTreeBuilder::SemiNCAInfoOnView<NodeT, true, View>;
+  friend struct DomTreeBuilder::SemiNCAInfoOnView<NodeT, false, View>;
 
  public:
-  DominatorTreeBase() {}
+  DominatorTreeOnView() {}
 
-  DominatorTreeBase(DominatorTreeBase &&Arg)
+  DominatorTreeOnView(DominatorTreeOnView &&Arg)
       : Roots(std::move(Arg.Roots)),
         DomTreeNodes(std::move(Arg.DomTreeNodes)),
         RootNode(Arg.RootNode),
@@ -272,7 +284,7 @@ class DominatorTreeBase {
     Arg.wipe();
   }
 
-  DominatorTreeBase &operator=(DominatorTreeBase &&RHS) {
+  DominatorTreeOnView &operator=(DominatorTreeOnView &&RHS) {
     Roots = std::move(RHS.Roots);
     DomTreeNodes = std::move(RHS.DomTreeNodes);
     RootNode = RHS.RootNode;
@@ -283,8 +295,8 @@ class DominatorTreeBase {
     return *this;
   }
 
-  DominatorTreeBase(const DominatorTreeBase &) = delete;
-  DominatorTreeBase &operator=(const DominatorTreeBase &) = delete;
+  DominatorTreeOnView(const DominatorTreeOnView &) = delete;
+  DominatorTreeOnView &operator=(const DominatorTreeOnView &) = delete;
 
   /// Iteration over roots.
   ///
@@ -314,7 +326,7 @@ class DominatorTreeBase {
 
   /// compare - Return false if the other dominator tree base matches this
   /// dominator tree base. Otherwise return true.
-  bool compare(const DominatorTreeBase &Other) const {
+  bool compare(const DominatorTreeOnView &Other) const {
     if (Parent != Other.Parent) return true;
 
     if (Roots.size() != Other.Roots.size())
@@ -334,8 +346,8 @@ class DominatorTreeBase {
       if (OI == OtherDomTreeNodes.end())
         return true;
 
-      DomTreeNodeBase<NodeT> &MyNd = *DomTreeNode.second;
-      DomTreeNodeBase<NodeT> &OtherNd = *OI->second;
+      DomTreeNodeOnView<NodeT, View> &MyNd = *DomTreeNode.second;
+      DomTreeNodeOnView<NodeT, View> &OtherNd = *OI->second;
 
       if (MyNd.compare(&OtherNd))
         return true;
@@ -348,7 +360,7 @@ class DominatorTreeBase {
   /// block.  This is the same as using operator[] on this class.  The result
   /// may (but is not required to) be null for a forward (backwards)
   /// statically unreachable block.
-  DomTreeNodeBase<NodeT> *getNode(const NodeT *BB) const {
+  DomTreeNodeOnView<NodeT, View> *getNode(const NodeT *BB) const {
     auto I = DomTreeNodes.find(BB);
     if (I != DomTreeNodes.end())
       return I->second.get();
@@ -356,7 +368,7 @@ class DominatorTreeBase {
   }
 
   /// See getNode.
-  DomTreeNodeBase<NodeT> *operator[](const NodeT *BB) const {
+  DomTreeNodeOnView<NodeT, View> *operator[](const NodeT *BB) const {
     return getNode(BB);
   }
 
@@ -367,20 +379,20 @@ class DominatorTreeBase {
   /// post-dominance information must be capable of dealing with this
   /// possibility.
   ///
-  DomTreeNodeBase<NodeT> *getRootNode() { return RootNode; }
-  const DomTreeNodeBase<NodeT> *getRootNode() const { return RootNode; }
+  DomTreeNodeOnView<NodeT, View> *getRootNode() { return RootNode; }
+  const DomTreeNodeOnView<NodeT, View> *getRootNode() const { return RootNode; }
 
   /// Get all nodes dominated by R, including R itself.
   void getDescendants(NodeT *R, SmallVectorImpl<NodeT *> &Result) const {
     Result.clear();
-    const DomTreeNodeBase<NodeT> *RN = getNode(R);
+    const DomTreeNodeOnView<NodeT, View> *RN = getNode(R);
     if (!RN)
       return; // If R is unreachable, it will not be present in the DOM tree.
-    SmallVector<const DomTreeNodeBase<NodeT> *, 8> WL;
+    SmallVector<const DomTreeNodeOnView<NodeT, View> *, 8> WL;
     WL.push_back(RN);
 
     while (!WL.empty()) {
-      const DomTreeNodeBase<NodeT> *N = WL.pop_back_val();
+      const DomTreeNodeOnView<NodeT, View> *N = WL.pop_back_val();
       Result.push_back(N->getBlock());
       WL.append(N->begin(), N->end());
     }
@@ -389,8 +401,8 @@ class DominatorTreeBase {
   /// properlyDominates - Returns true iff A dominates B and A != B.
   /// Note that this is not a constant time operation!
   ///
-  bool properlyDominates(const DomTreeNodeBase<NodeT> *A,
-                         const DomTreeNodeBase<NodeT> *B) const {
+  bool properlyDominates(const DomTreeNodeOnView<NodeT, View> *A,
+                         const DomTreeNodeOnView<NodeT, View> *B) const {
     if (!A || !B)
       return false;
     if (A == B)
@@ -408,13 +420,13 @@ class DominatorTreeBase {
     return isReachableFromEntry(getNode(const_cast<NodeT *>(A)));
   }
 
-  bool isReachableFromEntry(const DomTreeNodeBase<NodeT> *A) const { return A; }
+  bool isReachableFromEntry(const DomTreeNodeOnView<NodeT, View> *A) const { return A; }
 
   /// dominates - Returns true iff A dominates B.  Note that this is not a
   /// constant time operation!
   ///
-  bool dominates(const DomTreeNodeBase<NodeT> *A,
-                 const DomTreeNodeBase<NodeT> *B) const {
+  bool dominates(const DomTreeNodeOnView<NodeT, View> *A,
+                 const DomTreeNodeOnView<NodeT, View> *B) const {
     // A node trivially dominates itself.
     if (B == A)
       return true;
@@ -478,8 +490,8 @@ class DominatorTreeBase {
         return &Entry;
     }
 
-    DomTreeNodeBase<NodeT> *NodeA = getNode(A);
-    DomTreeNodeBase<NodeT> *NodeB = getNode(B);
+    DomTreeNodeOnView<NodeT, View> *NodeA = getNode(A);
+    DomTreeNodeOnView<NodeT, View> *NodeB = getNode(B);
     assert(NodeA && "A must be in the tree");
     assert(NodeB && "B must be in the tree");
 
@@ -502,7 +514,7 @@ class DominatorTreeBase {
                                       const_cast<NodeT *>(B));
   }
 
-  bool isVirtualRoot(const DomTreeNodeBase<NodeT> *A) const {
+  bool isVirtualRoot(const DomTreeNodeOnView<NodeT, View> *A) const {
     return isPostDominator() && !A->getBlock();
   }
 
@@ -544,7 +556,7 @@ class DominatorTreeBase {
   void applyUpdates(ArrayRef<UpdateType> Updates) {
     GraphDiff<NodePtr, IsPostDominator> PreViewCFG(
         Updates, /*ReverseApplyUpdates=*/true);
-    DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr);
+    DomTreeBuilder::ApplyUpdates<NodeT, IsPostDom, View>(*this, PreViewCFG, nullptr);
   }
 
   /// \param Updates An unordered sequence of updates to perform. The current
@@ -556,7 +568,7 @@ class DominatorTreeBase {
                     ArrayRef<UpdateType> PostViewUpdates) {
     if (Updates.empty()) {
       GraphDiff<NodePtr, IsPostDom> PostViewCFG(PostViewUpdates);
-      DomTreeBuilder::ApplyUpdates(*this, PostViewCFG, &PostViewCFG);
+      DomTreeBuilder::ApplyUpdates<NodeT, IsPostDom, View>(*this, PostViewCFG, &PostViewCFG);
     } else {
       // PreViewCFG needs to merge Updates and PostViewCFG. The updates in
       // Updates need to be reversed, and match the direction in PostViewCFG.
@@ -569,7 +581,7 @@ class DominatorTreeBase {
       GraphDiff<NodePtr, IsPostDom> PreViewCFG(AllUpdates,
                                                /*ReverseApplyUpdates=*/true);
       GraphDiff<NodePtr, IsPostDom> PostViewCFG(PostViewUpdates);
-      DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, &PostViewCFG);
+      DomTreeBuilder::ApplyUpdates<NodeT, IsPostDom, View>(*this, PreViewCFG, &PostViewCFG);
     }
   }
 
@@ -617,9 +629,9 @@ class DominatorTreeBase {
   /// \param DomBB CFG node that is dominator for BB.
   /// \returns New dominator tree node that represents new CFG node.
   ///
-  DomTreeNodeBase<NodeT> *addNewBlock(NodeT *BB, NodeT *DomBB) {
+  DomTreeNodeOnView<NodeT, View> *addNewBlock(NodeT *BB, NodeT *DomBB) {
     assert(getNode(BB) == nullptr && "Block already in dominator tree!");
-    DomTreeNodeBase<NodeT> *IDomNode = getNode(DomBB);
+    DomTreeNodeOnView<NodeT, View> *IDomNode = getNode(DomBB);
     assert(IDomNode && "Not immediate dominator specified for block!");
     DFSInfoValid = false;
     return createChild(BB, IDomNode);
@@ -630,12 +642,12 @@ class DominatorTreeBase {
   /// \param BB New node in CFG.
   /// \returns New dominator tree node that represents new CFG node.
   ///
-  DomTreeNodeBase<NodeT> *setNewRoot(NodeT *BB) {
+  DomTreeNodeOnView<NodeT, View> *setNewRoot(NodeT *BB) {
     assert(getNode(BB) == nullptr && "Block already in dominator tree!");
     assert(!this->isPostDominator() &&
            "Cannot change root of post-dominator tree");
     DFSInfoValid = false;
-    DomTreeNodeBase<NodeT> *NewNode = createNode(BB);
+    DomTreeNodeOnView<NodeT, View> *NewNode = createNode(BB);
     if (Roots.empty()) {
       addRoot(BB);
     } else {
@@ -653,8 +665,8 @@ class DominatorTreeBase {
   /// changeImmediateDominator - This method is used to update the dominator
   /// tree information when a node's immediate dominator changes.
   ///
-  void changeImmediateDominator(DomTreeNodeBase<NodeT> *N,
-                                DomTreeNodeBase<NodeT> *NewIDom) {
+  void changeImmediateDominator(DomTreeNodeOnView<NodeT, View> *N,
+                                DomTreeNodeOnView<NodeT, View> *NewIDom) {
     assert(N && NewIDom && "Cannot change null node pointers!");
     DFSInfoValid = false;
     N->setIDom(NewIDom);
@@ -668,14 +680,14 @@ class DominatorTreeBase {
   /// dominate any other blocks. Removes node from its immediate dominator's
   /// children list. Deletes dominator node associated with basic block BB.
   void eraseNode(NodeT *BB) {
-    DomTreeNodeBase<NodeT> *Node = getNode(BB);
+    DomTreeNodeOnView<NodeT, View> *Node = getNode(BB);
     assert(Node && "Removing node that isn't in dominator tree.");
     assert(Node->isLeaf() && "Node is not a leaf node.");
 
     DFSInfoValid = false;
 
     // Remove node from immediate dominator's children list.
-    DomTreeNodeBase<NodeT> *IDom = Node->getIDom();
+    DomTreeNodeOnView<NodeT, View> *IDom = Node->getIDom();
     if (IDom) {
       const auto I = find(IDom->Children, Node);
       assert(I != IDom->Children.end() &&
@@ -700,9 +712,9 @@ class DominatorTreeBase {
   /// tree to reflect this change.
   void splitBlock(NodeT *NewBB) {
     if (IsPostDominator)
-      Split<Inverse<NodeT *>>(NewBB);
+      Split<Inverse<View<NodeT *>>>(NewBB);
     else
-      Split<NodeT *>(NewBB);
+      Split<View<NodeT *>>(NewBB);
   }
 
   /// print - Convert to human readable form
@@ -736,11 +748,11 @@ class DominatorTreeBase {
       return;
     }
 
-    SmallVector<std::pair<const DomTreeNodeBase<NodeT> *,
-                          typename DomTreeNodeBase<NodeT>::const_iterator>,
+    SmallVector<std::pair<const DomTreeNodeOnView<NodeT, View> *,
+                          typename DomTreeNodeOnView<NodeT, View>::const_iterator>,
                 32> WorkStack;
 
-    const DomTreeNodeBase<NodeT> *ThisRoot = getRootNode();
+    const DomTreeNodeOnView<NodeT, View> *ThisRoot = getRootNode();
     assert((!Parent || ThisRoot) && "Empty constructed DomTree");
     if (!ThisRoot)
       return;
@@ -753,7 +765,7 @@ class DominatorTreeBase {
     ThisRoot->DFSNumIn = DFSNum++;
 
     while (!WorkStack.empty()) {
-      const DomTreeNodeBase<NodeT> *Node = WorkStack.back().first;
+      const DomTreeNodeOnView<NodeT, View> *Node = WorkStack.back().first;
       const auto ChildIt = WorkStack.back().second;
 
       // If we visited all of the children of this node, "recurse" back up the
@@ -763,7 +775,7 @@ class DominatorTreeBase {
         WorkStack.pop_back();
       } else {
         // Otherwise, recursively visit this child.
-        const DomTreeNodeBase<NodeT> *Child = *ChildIt;
+        const DomTreeNodeOnView<NodeT, View> *Child = *ChildIt;
         ++WorkStack.back().second;
 
         WorkStack.push_back({Child, Child->begin()});
@@ -816,15 +828,16 @@ class DominatorTreeBase {
 protected:
   void addRoot(NodeT *BB) { this->Roots.push_back(BB); }
 
-  DomTreeNodeBase<NodeT> *createChild(NodeT *BB, DomTreeNodeBase<NodeT> *IDom) {
+  DomTreeNodeOnView<NodeT, View> *
+  createChild(NodeT *BB, DomTreeNodeOnView<NodeT, View> *IDom) {
     return (DomTreeNodes[BB] = IDom->addChild(
-                std::make_unique<DomTreeNodeBase<NodeT>>(BB, IDom)))
+                std::make_unique<DomTreeNodeOnView<NodeT, View>>(BB, IDom)))
         .get();
   }
 
-  DomTreeNodeBase<NodeT> *createNode(NodeT *BB) {
+  DomTreeNodeOnView<NodeT, View> *createNode(NodeT *BB) {
     return (DomTreeNodes[BB] =
-                std::make_unique<DomTreeNodeBase<NodeT>>(BB, nullptr))
+                std::make_unique<DomTreeNodeOnView<NodeT, View>>(BB, nullptr))
         .get();
   }
 
@@ -873,25 +886,25 @@ class DominatorTreeBase {
     }
 
     // Create the new dominator tree node... and set the idom of NewBB.
-    DomTreeNodeBase<NodeT> *NewBBNode = addNewBlock(NewBB, NewBBIDom);
+    DomTreeNodeOnView<NodeT, View> *NewBBNode = addNewBlock(NewBB, NewBBIDom);
 
     // If NewBB strictly dominates other blocks, then it is now the immediate
     // dominator of NewBBSucc.  Update the dominator tree as appropriate.
     if (NewBBDominatesNewBBSucc) {
-      DomTreeNodeBase<NodeT> *NewBBSuccNode = getNode(NewBBSucc);
+      DomTreeNodeOnView<NodeT, View> *NewBBSuccNode = getNode(NewBBSucc);
       changeImmediateDominator(NewBBSuccNode, NewBBNode);
     }
   }
 
  private:
-  bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
-                               const DomTreeNodeBase<NodeT> *B) const {
+  bool dominatedBySlowTreeWalk(const DomTreeNodeOnView<NodeT, View> *A,
+                               const DomTreeNodeOnView<NodeT, View> *B) const {
     assert(A != B);
     assert(isReachableFromEntry(B));
     assert(isReachableFromEntry(A));
 
     const unsigned ALevel = A->getLevel();
-    const DomTreeNodeBase<NodeT> *IDom;
+    const DomTreeNodeOnView<NodeT, View> *IDom;
 
     // Don't walk nodes above A's subtree. When we reach A's level, we must
     // either find A or be in some other subtree not dominated by A.
@@ -912,16 +925,22 @@ class DominatorTreeBase {
   }
 };
 
+template <typename T, template<typename> class View>
+using DomTreeOnView = DominatorTreeOnView<T, false, View>;
+
 template <typename T>
-using DomTreeBase = DominatorTreeBase<T, false>;
+using DomTreeBase = DomTreeOnView<T, DTIdentityView>;
+
+template <typename T, template<typename> class View>
+using PostDomTreeOnView = DominatorTreeOnView<T, true, View>;
 
 template <typename T>
-using PostDomTreeBase = DominatorTreeBase<T, true>;
+using PostDomTreeBase = PostDomTreeOnView<T, DTIdentityView>;
 
 // These two functions are declared out of line as a workaround for building
 // with old (< r147295) versions of clang because of pr11642.
-template <typename NodeT, bool IsPostDom>
-bool DominatorTreeBase<NodeT, IsPostDom>::dominates(const NodeT *A,
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+bool DominatorTreeOnView<NodeT, IsPostDom, View>::dominates(const NodeT *A,
                                                     const NodeT *B) const {
   if (A == B)
     return true;
@@ -932,8 +951,8 @@ bool DominatorTreeBase<NodeT, IsPostDom>::dominates(const NodeT *A,
   return dominates(getNode(const_cast<NodeT *>(A)),
                    getNode(const_cast<NodeT *>(B)));
 }
-template <typename NodeT, bool IsPostDom>
-bool DominatorTreeBase<NodeT, IsPostDom>::properlyDominates(
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+bool DominatorTreeOnView<NodeT, IsPostDom, View>::properlyDominates(
     const NodeT *A, const NodeT *B) const {
   if (A == B)
     return false;
diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 4b59ad1f017f..6497fc3f75bf 100644
--- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -51,13 +51,13 @@
 namespace llvm {
 namespace DomTreeBuilder {
 
-template <typename DomTreeT>
-struct SemiNCAInfo {
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+struct SemiNCAInfoOnView {
+  using DomTreeT = DominatorTreeOnView<NodeT, IsPostDom, View>;
   using NodePtr = typename DomTreeT::NodePtr;
-  using NodeT = typename DomTreeT::NodeType;
-  using TreeNodePtr = DomTreeNodeBase<NodeT> *;
+  using TreeNode = DomTreeNodeOnView<NodeT, View>;
+  using TreeNodePtr = TreeNode*;
   using RootsT = decltype(DomTreeT::Roots);
-  static constexpr bool IsPostDom = DomTreeT::IsPostDominator;
   using GraphDiffT = GraphDiff<NodePtr, IsPostDom>;
 
   // Information record used by Semi-NCA during tree construction.
@@ -95,7 +95,7 @@ struct SemiNCAInfo {
   using BatchUpdatePtr = BatchUpdateInfo *;
 
   // If BUI is a nullptr, then there's no batch update in progress.
-  SemiNCAInfo(BatchUpdatePtr BUI) : BatchUpdates(BUI) {}
+  SemiNCAInfoOnView(BatchUpdatePtr BUI) : BatchUpdates(BUI) {}
 
   void clear() {
     NumToNode = {nullptr}; // Restore to initial state with a dummy start node.
@@ -107,14 +107,14 @@ struct SemiNCAInfo {
   template <bool Inversed>
   static SmallVector<NodePtr, 8> getChildren(NodePtr N, BatchUpdatePtr BUI) {
     if (BUI)
-      return BUI->PreViewCFG.template getChildren<Inversed>(N);
+      return BUI->PreViewCFG.template getChildren<Inversed, View>(N);
     return getChildren<Inversed>(N);
   }
 
   template <bool Inversed>
   static SmallVector<NodePtr, 8> getChildren(NodePtr N) {
     using DirectedNodeT =
-        std::conditional_t<Inversed, Inverse<NodePtr>, NodePtr>;
+        std::conditional_t<Inversed, Inverse<View<NodePtr>>, View<NodePtr>>;
     auto R = children<DirectedNodeT>(N);
     SmallVector<NodePtr, 8> Res(detail::reverse_if<!Inversed>(R));
 
@@ -357,7 +357,7 @@ struct SemiNCAInfo {
       return Roots;
     }
 
-    SemiNCAInfo SNCA(BUI);
+    SemiNCAInfoOnView SNCA(BUI);
 
     // PostDominatorTree always has a virtual root.
     SNCA.addVirtualRoot();
@@ -510,7 +510,7 @@ struct SemiNCAInfo {
     assert(IsPostDom && "This function is for postdominators only");
     LLVM_DEBUG(dbgs() << "Removing redundant roots\n");
 
-    SemiNCAInfo SNCA(BUI);
+    SemiNCAInfoOnView SNCA(BUI);
 
     for (unsigned i = 0; i < Roots.size(); ++i) {
       auto &Root = Roots[i];
@@ -571,7 +571,7 @@ struct SemiNCAInfo {
     }
     // This is rebuilding the whole tree, not incrementally, but PostViewBUI is
     // used in case the caller needs a DT update with a CFGView.
-    SemiNCAInfo SNCA(PostViewBUI);
+    SemiNCAInfoOnView SNCA(PostViewBUI);
 
     // Step #0: Number blocks in depth-first order and initialize variables used
     // in later stages of the algorithm.
@@ -904,7 +904,7 @@ struct SemiNCAInfo {
       return false;
     };
 
-    SemiNCAInfo SNCA(BUI);
+    SemiNCAInfoOnView SNCA(BUI);
     SNCA.runDFS(Root, 0, UnreachableDescender, 0);
     SNCA.runSemiNCA(DT);
     SNCA.attachNewSubtree(DT, Incoming);
@@ -997,7 +997,7 @@ struct SemiNCAInfo {
     LLVM_DEBUG(dbgs() << "\tTop of subtree: " << BlockNamePrinter(ToIDomTN)
                       << "\n");
 
-    SemiNCAInfo SNCA(BUI);
+    SemiNCAInfoOnView SNCA(BUI);
     SNCA.runDFS(ToIDom, 0, DescendBelow, 0);
     LLVM_DEBUG(dbgs() << "\tRunning Semi-NCA\n");
     SNCA.runSemiNCA(DT, Level);
@@ -1064,7 +1064,7 @@ struct SemiNCAInfo {
       return false;
     };
 
-    SemiNCAInfo SNCA(BUI);
+    SemiNCAInfoOnView SNCA(BUI);
     unsigned LastDFSNum =
         SNCA.runDFS(ToTN->getBlock(), 0, DescendAndCollect, 0);
 
@@ -1558,48 +1558,46 @@ struct SemiNCAInfo {
   }
 };
 
-template <class DomTreeT>
-void Calculate(DomTreeT &DT) {
-  SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, nullptr);
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void Calculate(DominatorTreeOnView<NodeT, IsPostDom, View> &DT) {
+  SemiNCAInfoOnView<NodeT, IsPostDom, View>::CalculateFromScratch(DT, nullptr);
 }
 
-template <typename DomTreeT>
-void CalculateWithUpdates(DomTreeT &DT,
-                          ArrayRef<typename DomTreeT::UpdateType> Updates) {
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void CalculateWithUpdates(DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+                          ArrayRef<typename DominatorTreeOnView<NodeT, IsPostDom, View>::UpdateType> Updates) {
   // FIXME: Updated to use the PreViewCFG and behave the same as until now.
   // This behavior is however incorrect; this actually needs the PostViewCFG.
-  GraphDiff<typename DomTreeT::NodePtr, DomTreeT::IsPostDominator> PreViewCFG(
+  GraphDiff<typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr, IsPostDom> PreViewCFG(
       Updates, /*ReverseApplyUpdates=*/true);
-  typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI(PreViewCFG);
-  SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, &BUI);
+  typename SemiNCAInfoOnView<NodeT, IsPostDom, View>::BatchUpdateInfo BUI(PreViewCFG);
+  SemiNCAInfoOnView<NodeT, IsPostDom, View>::CalculateFromScratch(DT, &BUI);
 }
 
-template <class DomTreeT>
-void InsertEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
-                typename DomTreeT::NodePtr To) {
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void InsertEdge(DominatorTreeOnView<NodeT, IsPostDom, View> &DT, typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr From,
+                typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr To) {
   if (DT.isPostDominator()) std::swap(From, To);
-  SemiNCAInfo<DomTreeT>::InsertEdge(DT, nullptr, From, To);
+  SemiNCAInfoOnView<NodeT, IsPostDom, View>::InsertEdge(DT, nullptr, From, To);
 }
 
-template <class DomTreeT>
-void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From,
-                typename DomTreeT::NodePtr To) {
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void DeleteEdge(DominatorTreeOnView<NodeT, IsPostDom, View> &DT, typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr From,
+                typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr To) {
   if (DT.isPostDominator()) std::swap(From, To);
-  SemiNCAInfo<DomTreeT>::DeleteEdge(DT, nullptr, From, To);
+  SemiNCAInfoOnView<NodeT, IsPostDom, View>::DeleteEdge(DT, nullptr, From, To);
 }
 
-template <class DomTreeT>
-void ApplyUpdates(DomTreeT &DT,
-                  GraphDiff<typename DomTreeT::NodePtr,
-                            DomTreeT::IsPostDominator> &PreViewCFG,
-                  GraphDiff<typename DomTreeT::NodePtr,
-                            DomTreeT::IsPostDominator> *PostViewCFG) {
-  SemiNCAInfo<DomTreeT>::ApplyUpdates(DT, PreViewCFG, PostViewCFG);
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+void ApplyUpdates(DominatorTreeOnView<NodeT, IsPostDom, View> &DT,
+                  GraphDiff<typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr, IsPostDom> &PreViewCFG,
+                  GraphDiff<typename DominatorTreeOnView<NodeT, IsPostDom, View>::NodePtr, IsPostDom> *PostViewCFG) {
+  SemiNCAInfoOnView<NodeT, IsPostDom, View>::ApplyUpdates(DT, PreViewCFG, PostViewCFG);
 }
 
-template <class DomTreeT>
-bool Verify(const DomTreeT &DT, typename DomTreeT::VerificationLevel VL) {
-  SemiNCAInfo<DomTreeT> SNCA(nullptr);
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+bool Verify(const DominatorTreeOnView<NodeT, IsPostDom, View> &DT, typename DominatorTreeOnView<NodeT, IsPostDom, View>::VerificationLevel VL) {
+  SemiNCAInfoOnView<NodeT, IsPostDom, View> SNCA(nullptr);
 
   // Simplist check is to compare against a new tree. This will also
   // usefully print the old and new trees, if they are different.
@@ -1611,12 +1609,12 @@ bool Verify(const DomTreeT &DT, typename DomTreeT::VerificationLevel VL) {
       !SNCA.VerifyLevels(DT) || !SNCA.VerifyDFSNumbers(DT))
     return false;
 
-  // Extra checks depending on VerificationLevel. Up to O(N^3).
-  if (VL == DomTreeT::VerificationLevel::Basic ||
-      VL == DomTreeT::VerificationLevel::Full)
+  // Extra checks depending on VerificationLevel. Up to O(N^3)
+  if (VL == DominatorTreeOnView<NodeT, IsPostDom, View>::VerificationLevel::Basic ||
+      VL == DominatorTreeOnView<NodeT, IsPostDom, View>::VerificationLevel::Full)
     if (!SNCA.verifyParentProperty(DT))
       return false;
-  if (VL == DomTreeT::VerificationLevel::Full)
+  if (VL == DominatorTreeOnView<NodeT, IsPostDom, View>::VerificationLevel::Full)
     if (!SNCA.verifySiblingProperty(DT))
       return false;
 
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 951660bbab28..e2df311cb60e 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -20,8 +20,17 @@
 
 namespace llvm {
 
-template <typename T> class DomTreeNodeBase;
+template<typename X>
+using DTIdentityView = X;
+
+template <typename NodeT, template<typename> class View>
+class DomTreeNodeOnView;
+
+template <typename NodeT>
+using DomTreeNodeBase = DomTreeNodeOnView<NodeT, DTIdentityView>;
+
 using DomTreeNode = DomTreeNodeBase<BasicBlock>;
+
 class AAResults;
 class AliasSet;
 class AliasSetTracker;
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 99fa58b8872a..4737c40948f3 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -876,7 +876,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
       // Check number of predecessors, we only care if there's more than one.
       unsigned Count = 0;
       BasicBlock *Pred = nullptr;
-      for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) {
+      for (auto *Pi : GD->template getChildren</*InverseEdge=*/true, DTIdentityView>(BB)) {
         Pred = Pi;
         Count++;
         if (Count == 2)
@@ -970,7 +970,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
     auto *BB = BBPredPair.first;
     const auto &AddedBlockSet = BBPredPair.second.Added;
     auto &PrevBlockSet = BBPredPair.second.Prev;
-    for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BB)) {
+    for (auto *Pi : GD->template getChildren</*InverseEdge=*/true, DTIdentityView>(BB)) {
       if (!AddedBlockSet.count(Pi))
         PrevBlockSet.insert(Pi);
       EdgeCountMap[{Pi, BB}]++;
@@ -1121,7 +1121,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
         for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
           IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
       } else {
-        for (auto *Pi : GD->template getChildren</*InverseEdge=*/true>(BBIDF))
+        for (auto *Pi : GD->template getChildren</*InverseEdge=*/true, DTIdentityView>(BBIDF))
           IDFPhi->addIncoming(GetLastDef(Pi), Pi);
       }
     }
diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp
index c8845d838282..3c8d79644001 100644
--- a/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/llvm/lib/CodeGen/MachineDominators.cpp
@@ -33,8 +33,8 @@ static cl::opt<bool, true> VerifyMachineDomInfoX(
     cl::desc("Verify machine dominator info (time consuming)"));
 
 namespace llvm {
-template class DomTreeNodeBase<MachineBasicBlock>;
-template class DominatorTreeBase<MachineBasicBlock, false>; // DomTreeBase
+template class DomTreeNodeOnView<MachineBasicBlock, DTIdentityView>;
+template class DominatorTreeOnView<MachineBasicBlock, false, DTIdentityView>; // DomTreeBase
 }
 
 char MachineDominatorTree::ID = 0;
diff --git a/llvm/lib/CodeGen/MachinePostDominators.cpp b/llvm/lib/CodeGen/MachinePostDominators.cpp
index fb96d0efa4d4..c13fdbbc1815 100644
--- a/llvm/lib/CodeGen/MachinePostDominators.cpp
+++ b/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -17,7 +17,7 @@
 using namespace llvm;
 
 namespace llvm {
-template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
+template class DominatorTreeOnView<MachineBasicBlock, true, DTIdentityView>; // PostDomTreeBase
 
 extern bool VerifyMachineDomInfo;
 } // namespace llvm
diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp
index fbc28c202aec..86326fdea3cc 100644
--- a/llvm/lib/IR/Dominators.cpp
+++ b/llvm/lib/IR/Dominators.cpp
@@ -63,43 +63,59 @@ bool BasicBlockEdge::isSingleEdge() const {
 //
 //===----------------------------------------------------------------------===//
 
-template class llvm::DomTreeNodeBase<BasicBlock>;
-template class llvm::DominatorTreeBase<BasicBlock, false>; // DomTreeBase
-template class llvm::DominatorTreeBase<BasicBlock, true>; // PostDomTreeBase
+
+namespace llvm {
+
+template <typename NodeT, bool IsPostDom, template<typename> class View>
+class DominatorTreeOnView;
+
+template <typename NodeT, bool IsPostDom>
+using DominatorTreeBase = DominatorTreeOnView<NodeT, IsPostDom, DTIdentityView>;
+
+template <class NodeT, template<typename> class View>
+class DomTreeNodeOnView;
+
+template <class NodeT>
+using DomTreeNodeBase = DomTreeNodeOnView<NodeT, DTIdentityView>;
+
+template class DomTreeNodeOnView<BasicBlock, DTIdentityView>;
+template class DominatorTreeOnView<BasicBlock, false, DTIdentityView>; // DomTree
+template class DominatorTreeOnView<BasicBlock, true, DTIdentityView>; // PostDomTree
+} // end namespace
 
 template class llvm::cfg::Update<BasicBlock *>;
 
-template void llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBDomTree>(
+template void llvm::DomTreeBuilder::Calculate<BasicBlock, false, DTIdentityView>(
     DomTreeBuilder::BBDomTree &DT);
 template void
-llvm::DomTreeBuilder::CalculateWithUpdates<DomTreeBuilder::BBDomTree>(
+llvm::DomTreeBuilder::CalculateWithUpdates<BasicBlock, false, DTIdentityView>(
     DomTreeBuilder::BBDomTree &DT, BBUpdates U);
 
-template void llvm::DomTreeBuilder::Calculate<DomTreeBuilder::BBPostDomTree>(
+template void llvm::DomTreeBuilder::Calculate<BasicBlock, true, DTIdentityView>(
     DomTreeBuilder::BBPostDomTree &DT);
 // No CalculateWithUpdates<PostDomTree> instantiation, unless a usecase arises.
 
-template void llvm::DomTreeBuilder::InsertEdge<DomTreeBuilder::BBDomTree>(
+template void llvm::DomTreeBuilder::InsertEdge<BasicBlock, false, DTIdentityView>(
     DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To);
-template void llvm::DomTreeBuilder::InsertEdge<DomTreeBuilder::BBPostDomTree>(
+template void llvm::DomTreeBuilder::InsertEdge<BasicBlock, true, DTIdentityView>(
     DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
 
-template void llvm::DomTreeBuilder::DeleteEdge<DomTreeBuilder::BBDomTree>(
+template void llvm::DomTreeBuilder::DeleteEdge<BasicBlock, false, DTIdentityView>(
     DomTreeBuilder::BBDomTree &DT, BasicBlock *From, BasicBlock *To);
-template void llvm::DomTreeBuilder::DeleteEdge<DomTreeBuilder::BBPostDomTree>(
+template void llvm::DomTreeBuilder::DeleteEdge<BasicBlock, true, DTIdentityView>(
     DomTreeBuilder::BBPostDomTree &DT, BasicBlock *From, BasicBlock *To);
 
-template void llvm::DomTreeBuilder::ApplyUpdates<DomTreeBuilder::BBDomTree>(
-    DomTreeBuilder::BBDomTree &DT, DomTreeBuilder::BBDomTreeGraphDiff &,
-    DomTreeBuilder::BBDomTreeGraphDiff *);
-template void llvm::DomTreeBuilder::ApplyUpdates<DomTreeBuilder::BBPostDomTree>(
-    DomTreeBuilder::BBPostDomTree &DT, DomTreeBuilder::BBPostDomTreeGraphDiff &,
-    DomTreeBuilder::BBPostDomTreeGraphDiff *);
+template void
+llvm::DomTreeBuilder::ApplyUpdates<BasicBlock, false, DTIdentityView>(
+    BBDomTree &DT, BBDomTreeGraphDiff &, BBDomTreeGraphDiff *);
+template void
+llvm::DomTreeBuilder::ApplyUpdates<BasicBlock, true, DTIdentityView>(
+    BBPostDomTree &DT, BBPostDomTreeGraphDiff &, BBPostDomTreeGraphDiff *);
 
-template bool llvm::DomTreeBuilder::Verify<DomTreeBuilder::BBDomTree>(
+template bool llvm::DomTreeBuilder::Verify<BasicBlock, false, DTIdentityView>(
     const DomTreeBuilder::BBDomTree &DT,
     DomTreeBuilder::BBDomTree::VerificationLevel VL);
-template bool llvm::DomTreeBuilder::Verify<DomTreeBuilder::BBPostDomTree>(
+template bool llvm::DomTreeBuilder::Verify<BasicBlock, true, DTIdentityView>(
     const DomTreeBuilder::BBPostDomTree &DT,
     DomTreeBuilder::BBPostDomTree::VerificationLevel VL);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b26399e0ae58..709ddd291505 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1018,7 +1018,7 @@ void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
   O << " = WIDEN-CANONICAL-INDUCTION";
 }
 
-template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
+template void DomTreeBuilder::Calculate<VPBlockBase, false, DTIdentityView>(VPDominatorTree &DT);
 
 void VPValue::replaceAllUsesWith(VPValue *New) {
   for (unsigned J = 0; J < getNumUsers();) {
diff --git a/llvm/unittests/IR/DominatorTreeBatchUpdatesTest.cpp b/llvm/unittests/IR/DominatorTreeBatchUpdatesTest.cpp
index a2d5805a9874..5e1a8f28527b 100644
--- a/llvm/unittests/IR/DominatorTreeBatchUpdatesTest.cpp
+++ b/llvm/unittests/IR/DominatorTreeBatchUpdatesTest.cpp
@@ -26,8 +26,8 @@ using DomUpdate = DominatorTree::UpdateType;
 static_assert(
     std::is_same<DomUpdate, PostDominatorTree::UpdateType>::value,
     "Trees differing only in IsPostDom should have the same update types");
-using DomSNCA = DomTreeBuilder::SemiNCAInfo<DomTreeBuilder::BBDomTree>;
-using PostDomSNCA = DomTreeBuilder::SemiNCAInfo<DomTreeBuilder::BBPostDomTree>;
+using DomSNCA = DomTreeBuilder::SemiNCAInfoOnView<BasicBlock, false, DTIdentityView>;
+using PostDomSNCA = DomTreeBuilder::SemiNCAInfoOnView<BasicBlock, true, DTIdentityView>;
 const auto Insert = DominatorTree::Insert;
 const auto Delete = DominatorTree::Delete;
 

From 7433c47414a360cf27e3bbbb8574d3e718c85ddd Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Thu, 27 May 2021 17:27:49 +0200
Subject: [PATCH 239/244] Enable PDT on GraphTraits using mapped_iterator

---
 llvm/include/llvm/Support/CFGDiff.h                    | 5 +++--
 llvm/include/llvm/Support/GenericDomTreeConstruction.h | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Support/CFGDiff.h b/llvm/include/llvm/Support/CFGDiff.h
index 07cf242d67a8..b0a50690d228 100644
--- a/llvm/include/llvm/Support/CFGDiff.h
+++ b/llvm/include/llvm/Support/CFGDiff.h
@@ -135,8 +135,9 @@ template <typename NodePtr, bool InverseGraph = false> class GraphDiff {
   template <bool InverseEdge, template <typename> class View> VectRet getChildren(NodePtr N) const {
     using DirectedNodeT =
         std::conditional_t<InverseEdge, Inverse<View<NodePtr>>, View<NodePtr>>;
-    auto R = children<DirectedNodeT>(N);
-    VectRet Res = VectRet(detail::reverse_if<!InverseEdge>(R));
+    VectRet Res = VectRet(children<DirectedNodeT>(N));
+    if (not InverseEdge)
+      std::reverse(Res.begin(), Res.end());
 
     // Remove nullptr children for clang.
     llvm::erase_value(Res, nullptr);
diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 6497fc3f75bf..8f20da1991ec 100644
--- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -115,8 +115,10 @@ struct SemiNCAInfoOnView {
   static SmallVector<NodePtr, 8> getChildren(NodePtr N) {
     using DirectedNodeT =
         std::conditional_t<Inversed, Inverse<View<NodePtr>>, View<NodePtr>>;
-    auto R = children<DirectedNodeT>(N);
-    SmallVector<NodePtr, 8> Res(detail::reverse_if<!Inversed>(R));
+    using VectRet = SmallVector<NodePtr, 8>;
+    VectRet Res = VectRet(children<DirectedNodeT>(N));
+    if (not Inversed)
+      std::reverse(Res.begin(), Res.end());
 
     // Remove nullptr children for clang.
     llvm::erase_value(Res, nullptr);

From 404323513a7270ddc2e2077fba2018bd743a117c Mon Sep 17 00:00:00 2001
From: Pietro Fezzardi <pietro@rev.ng>
Date: Thu, 10 Jun 2021 16:59:56 +0200
Subject: [PATCH 240/244] SROA: speculate load across transitive PHI and
 SelectInst

---
 llvm/lib/Transforms/Scalar/SROA.cpp           |  83 +++++++--
 .../SROA/phi-and-select-aggressive.ll         | 164 ++++++++++++++++++
 llvm/test/Transforms/SROA/phi-and-select.ll   |  42 +++++
 3 files changed, 272 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/Transforms/SROA/phi-and-select-aggressive.ll

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index af510f1a84bf..319d46414154 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -115,6 +115,12 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates");
 static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
                                         cl::Hidden);
 
+/// Hidden option to experiment with aggressive speculation on transitive chains
+/// of PHIs and SelectInsts
+static cl::opt<bool> SROAAggressivePHISelects("sroa-aggressive-phis-selects",
+                                              cl::init(false),
+                                              cl::Hidden);
+
 namespace {
 
 /// A custom IRBuilder inserter which prefixes all names, but only in
@@ -1189,7 +1195,7 @@ findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
 ///
 /// FIXME: This should be hoisted into a generic utility, likely in
 /// Transforms/Util/Local.h
-static bool isSafePHIToSpeculate(PHINode &PN) {
+static bool isSafePHIToSpeculate(PHINode &PN, const SmallPtrSet<Instruction *, 8> &GoodPHIOrSelects) {
   const DataLayout &DL = PN.getModule()->getDataLayout();
 
   // For now, we can only do this promotion if the load is in the same block
@@ -1201,7 +1207,12 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
   uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
   APInt MaxSize(APWidth, 0);
   bool HaveLoad = false;
-  for (User *U : PN.users()) {
+  for (Use &TheUse : PN.uses()) {
+    User *U = TheUse.getUser();
+    if (GoodPHIOrSelects.count(dyn_cast<Instruction>(U)))
+      if (not isa<SelectInst>(U) or TheUse.getOperandNo() != 0)
+        continue;
+
     LoadInst *LI = dyn_cast<LoadInst>(U);
     if (!LI || !LI->isSimple())
       return false;
@@ -1324,12 +1335,17 @@ static void speculatePHINodeLoads(PHINode &PN) {
 ///
 /// We can do this to a select if its only uses are loads and if the operand
 /// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI) {
+static bool isSafeSelectToSpeculate(SelectInst &SI, const SmallPtrSet<Instruction *, 8> &GoodPHIOrSelects) {
   Value *TValue = SI.getTrueValue();
   Value *FValue = SI.getFalseValue();
   const DataLayout &DL = SI.getModule()->getDataLayout();
 
-  for (User *U : SI.users()) {
+  for (Use &TheUse : SI.uses()) {
+    User *U = TheUse.getUser();
+    if (GoodPHIOrSelects.count(dyn_cast<Instruction>(U)))
+      if (not isa<SelectInst>(U) or TheUse.getOperandNo() != 0)
+        continue;
+
     LoadInst *LI = dyn_cast<LoadInst>(U);
     if (!LI || !LI->isSimple())
       return false;
@@ -4346,21 +4362,54 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
 
   // Now that we've processed all the slices in the new partition, check if any
   // PHIs or Selects would block promotion.
-  for (PHINode *PHI : PHIUsers)
-    if (!isSafePHIToSpeculate(*PHI)) {
-      Promotable = false;
-      PHIUsers.clear();
-      SelectUsers.clear();
-      break;
-    }
+  if (SROAAggressivePHISelects.getNumOccurrences()
+      and SROAAggressivePHISelects.getValue()) {
 
-  for (SelectInst *Sel : SelectUsers)
-    if (!isSafeSelectToSpeculate(*Sel)) {
+    SmallPtrSet<Instruction *, 8> GoodPHIOrSelects;
+    bool FoundPromotable = true;
+    do {
+      FoundPromotable = false;
+      for (PHINode *PHI : PHIUsers) {
+        if (GoodPHIOrSelects.count(PHI))
+            continue;
+        if (isSafePHIToSpeculate(*PHI, GoodPHIOrSelects)) {
+          FoundPromotable = true;
+          GoodPHIOrSelects.insert(PHI);
+        }
+      }
+
+      for (SelectInst *Sel : SelectUsers) {
+        if (GoodPHIOrSelects.count(Sel))
+            continue;
+        if (isSafeSelectToSpeculate(*Sel, GoodPHIOrSelects)) {
+          FoundPromotable = true;
+          GoodPHIOrSelects.insert(Sel);
+        }
+      }
+    } while (FoundPromotable);
+
+    if (GoodPHIOrSelects.size() < (SelectUsers.size() + PHIUsers.size()))
       Promotable = false;
-      PHIUsers.clear();
-      SelectUsers.clear();
-      break;
-    }
+
+  } else {
+
+    for (PHINode *PHI : PHIUsers)
+      if (!isSafePHIToSpeculate(*PHI, {})) {
+        Promotable = false;
+        PHIUsers.clear();
+        SelectUsers.clear();
+        break;
+      }
+
+    for (SelectInst *Sel : SelectUsers)
+      if (!isSafeSelectToSpeculate(*Sel, {})) {
+        Promotable = false;
+        PHIUsers.clear();
+        SelectUsers.clear();
+        break;
+      }
+
+  }
 
   if (Promotable) {
     for (Use *U : AS.getDeadUsesIfPromotable()) {
diff --git a/llvm/test/Transforms/SROA/phi-and-select-aggressive.ll b/llvm/test/Transforms/SROA/phi-and-select-aggressive.ll
new file mode 100644
index 000000000000..f605c4cee199
--- /dev/null
+++ b/llvm/test/Transforms/SROA/phi-and-select-aggressive.ll
@@ -0,0 +1,164 @@
+; RUN: opt < %s -sroa --sroa-aggressive-phis-selects -dce -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+; This tests have been produced with llvm-12 from the following C code with the
+; commands:
+;   clang -O0 file.c -S -emit-llvm -o file-O0.ll -Xclang -disable-O0-optnone
+;   opt file-O0.ll -S -o test.ll -instcombine -simplifycfg -dce -dse -sroa -sroa -sroa
+;
+; After this, all debug info and all metadata have been removed.
+;
+; The three sroa invocations at the end are meant to rule out everything that
+; regular old sroa is already able to optimize away. What's left should only be
+; optimizeable by sroa with the new --sroa-aggressive-phis-selects.
+;
+; The selectphichain test has also been included in phi-and-select.ll, to make
+; sure that regular sroa is not able to optimize it.
+;
+; If the regular sroa ever becomes powerful enough to handle the cases that
+; previously required the aggressive version, the aggressive version can likely
+; be dropped alltogether.
+;
+;
+; #include <alloca.h>
+; #include <stdio.h>
+;
+; extern int initx(void);
+; extern int inity(void);
+; extern void leak(int* leaked);
+; extern int getcondition(void);
+;
+; int selectphichain()
+; {
+;   int* x = alloca(sizeof(int));
+;   int* y = alloca(sizeof(int));
+;   *x = initx();
+;   *y = inity();
+;   int* p = NULL;
+;   if (getcondition() > 3) {
+;     *x = 100;
+;     if (getcondition() > 8) {
+;       p = x;
+;     } else {
+;       p = y;
+;     }
+;     *x = 200;
+;   } else {
+;     p = x;
+;   }
+;   return *p;
+; }
+;
+; int leaking()
+; {
+;   int* x = alloca(sizeof(int));
+;   int* y = alloca(sizeof(int));
+;   *x = initx();
+;   leak(x);
+;   *y = inity();
+;   leak(y);
+;   int* p = NULL;
+;   if (getcondition() > 3) {
+;     *x = 100;
+;     if (getcondition() > 8) {
+;       p = x;
+;     } else {
+;       p = y;
+;     }
+;     *x = 200;
+;   } else {
+;     p = x;
+;   }
+;   return *p;
+; }
+
+declare i32 @initx()
+
+declare i32 @inity()
+
+declare i32 @getcondition()
+
+declare void @leak(i32*)
+
+define i32 @selectphichain() {
+; CHECK-LABEL: @selectphichain(
+  %1 = alloca i32, align 16
+; The first alloca could be optimized away in principle, but that would require
+; running sroa in fixed-point fashion. This would require major intrusive
+; changes to the SROA pass, that we want to avoid at the moment.
+; Another option would be to execute the sroa pass with
+; --sroa-aggressive-phis-selects twice, but at the moment we are not doing this.
+; The fact that one of the 2 allocas is optimized away with
+; --sroa-aggressive-phis-selects is already enough to prove that do something
+; strictly more aggressive than regular sroa. This is already enough for our
+; purposes for now. If we ever find a way to make this more aggressive (or we
+; end up needing it to be) we might want to redesign this test.
+; CHECK: %1 = alloca i32, align 16
+; CHECK-NOT: alloca
+  %2 = alloca i32, align 16
+  %3 = call i32 @initx()
+  store i32 %3, i32* %1, align 16
+; CHECK: %2 = call i32 @initx()
+; CHECK: %3 = call i32 @inity()
+  %4 = call i32 @inity()
+  store i32 %4, i32* %2, align 16
+; CHECK: store i32 %3, i32* %1, align 16
+  %5 = call i32 @getcondition()
+  %6 = icmp sgt i32 %5, 3
+  br i1 %6, label %7, label %10
+
+7:                                                ; preds = %0
+  %8 = call i32 @getcondition()
+  %9 = icmp sgt i32 %8, 8
+  %s = select i1 %9, i32* %1, i32* %2
+  br label %11
+
+10:                                               ; preds = %0
+  br label %11
+
+11:                                               ; preds = %10, %7
+  %thephi = phi i32* [ %s, %7 ], [ %1, %10 ]
+  %12 = load i32, i32* %thephi, align 4
+  ret i32 %12
+; CHECK: ret i32 %thephi.sroa.speculated
+}
+
+define i32 @leaking() {
+; CHECK-LABEL: @leaking(
+  %1 = alloca i32, align 16
+; CHECK: %1 = alloca i32, align 16
+  %2 = alloca i32, align 16
+; CHECK: %2 = alloca i32, align 16
+  %3 = call i32 @initx()
+; CHECK: %3 = call i32 @initx()
+  store i32 %3, i32* %1, align 16
+; CHECK: store i32 %3, i32* %1, align 16
+  call void @leak(i32 *%1)
+; CHECK: call void @leak(i32* %1)
+  %4 = call i32 @inity()
+; CHECK: %4 = call i32 @inity()
+  store i32 %4, i32* %2, align 16
+; CHECK: store i32 %4, i32* %2, align 16
+  call void @leak(i32 *%2)
+; CHECK: call void @leak(i32* %2)
+  %5 = call i32 @getcondition()
+  %6 = icmp sgt i32 %5, 3
+  br i1 %6, label %7, label %10
+
+7:                                                ; preds = %0
+  %8 = call i32 @getcondition()
+  %9 = icmp sgt i32 %8, 8
+  %s = select i1 %9, i32* %1, i32* %2
+; CHECK: %s = select i1 %9, i32* %1, i32* %2
+  br label %11
+
+10:                                               ; preds = %0
+  br label %11
+
+11:                                               ; preds = %10, %7
+  %thephi = phi i32* [ %s, %7 ], [ %1, %10 ]
+; CHECK: %thephi = phi i32* [ %s, %7 ], [ %1, %10 ]
+  %12 = load i32, i32* %thephi, align 4
+  ret i32 %12
+; CHECK: ret i32 %12
+}
diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index 8f04e3df13f9..157861fd9573 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -694,3 +694,45 @@ define i8 @volatile_select(i8* %p, i1 %b) {
   %v2 = load i8, i8* %px
   ret i8 %v2
 }
+
+declare i32 @initx()
+
+declare i32 @inity()
+
+declare i32 @getcondition()
+
+define i32 @selectphichain() {
+; CHECK-LABEL: @selectphichain(
+  %1 = alloca i32, align 16
+; CHECK: %1 = alloca i32, align 16
+  %2 = alloca i32, align 16
+; CHECK:  %2 = alloca i32, align 16
+  %3 = call i32 @initx()
+; CHECK: %3 = call i32 @initx()
+  store i32 %3, i32* %1, align 16
+; CHECK: store i32 %3, i32* %1, align 16
+  %4 = call i32 @inity()
+; CHECK: %4 = call i32 @inity()
+  store i32 %4, i32* %2, align 16
+; CHECK: store i32 %4, i32* %2, align 16
+  %5 = call i32 @getcondition()
+  %6 = icmp sgt i32 %5, 3
+  br i1 %6, label %7, label %10
+
+7:                                                ; preds = %0
+  %8 = call i32 @getcondition()
+  %9 = icmp sgt i32 %8, 8
+  %s = select i1 %9, i32* %1, i32* %2
+; CHECK: %s = select i1 %9, i32* %1, i32* %2
+  br label %11
+
+10:                                               ; preds = %0
+  br label %11
+
+11:                                               ; preds = %10, %7
+  %thephi = phi i32* [ %s, %7 ], [ %1, %10 ]
+; CHECK: %thephi = phi i32* [ %s, %7 ], [ %1, %10 ]
+  %12 = load i32, i32* %thephi, align 4
+; CHECK: %12 = load i32, i32* %thephi, align 4
+  ret i32 %12
+}

From 79a55d0b954c97998708f8a22e7f62781f7b7e0b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <antoniofrighetto@rev.ng>
Date: Fri, 17 Sep 2021 15:08:34 +0200
Subject: [PATCH 241/244] Bitcast wrapped in a call obscures function
 attributes, pessimizing MemorySSA

A logic incompleteness may lead MemorySSA to be too conservative in its results. Specifically,
when dealing with a call of kind `call i32 bitcast (i1 (i1)* @test to i32 (i32)*)(i32 %1)`,
where function call `test` is declared with `readonly` attribute, the bitcast is not wrapped,
obscuring function attributes. Hence, some methods of CallBase (e.g., `doesNotReadMemory`)
could provide incomplete results. This issue was addressed with improved checks.
---
 llvm/lib/IR/Instructions.cpp                  | 16 +++-
 llvm/test/Analysis/MemorySSA/call-bitcast.ll  | 14 ++++
 .../IPConstantProp/arg-count-mismatch.ll      | 31 ++++---
 .../IPConstantProp/arg-type-mismatch.ll       | 16 ++--
 llvm/test/Transforms/Attributor/liveness.ll   | 80 ++++++++++++-------
 5 files changed, 114 insertions(+), 43 deletions(-)
 create mode 100644 llvm/test/Analysis/MemorySSA/call-bitcast.ll

diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index d6b4a4f5030f..73d1e0aa622b 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -334,14 +334,26 @@ bool CallBase::paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
 }
 
 bool CallBase::hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const {
-  if (const Function *F = getCalledFunction())
+  Value *V = getCalledOperand();
+  if (auto *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == BitCast)
+      V = CE->getOperand(0);
+
+  if (auto *F = dyn_cast<Function>(V))
     return F->getAttributes().hasFnAttribute(Kind);
+
   return false;
 }
 
 bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const {
-  if (const Function *F = getCalledFunction())
+  Value *V = getCalledOperand();
+  if (auto *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == BitCast)
+      V = CE->getOperand(0);
+
+  if (auto *F = dyn_cast<Function>(V))
     return F->getAttributes().hasFnAttribute(Kind);
+
   return false;
 }
 
diff --git a/llvm/test/Analysis/MemorySSA/call-bitcast.ll b/llvm/test/Analysis/MemorySSA/call-bitcast.ll
new file mode 100644
index 000000000000..c4bfdbee77ad
--- /dev/null
+++ b/llvm/test/Analysis/MemorySSA/call-bitcast.ll
@@ -0,0 +1,14 @@
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Ensures that MemorySSA leverages the ground truth of the function being called when wrapped in a bitcast.
+
+declare i1 @opaque_true(i1) nounwind readonly
+
+define i1 @foo(i32* %ptr, i1 %cond) {
+  %cond_wide = zext i1 %cond to i32
+; CHECK: MemoryUse(liveOnEntry) MayAlias
+; CHECK-NEXT: call i32 bitcast
+  %cond_hidden_wide = call i32 bitcast (i1 (i1)* @opaque_true to i32 (i32)*)(i32 %cond_wide)
+  %cond_hidden = trunc i32 %cond_hidden_wide to i1
+  ret i1 %cond_hidden
+}
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
index 3bdfbbb36eb1..22d7ce63a658 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
@@ -70,10 +70,16 @@ define internal i16 @bar(i16 %p1, i16 %p2) {
 }
 
 define dso_local i16 @foo2(i16 %a) {
-; CHECK-LABEL: define {{[^@]+}}@foo2
-; CHECK-SAME: (i16 [[A:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]])
-; CHECK-NEXT:    ret i16 [[CALL]]
+; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@foo2
+; NOT_CGSCC_NPM-SAME: (i16 [[A:%.*]]) {
+; NOT_CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]])
+; NOT_CGSCC_NPM-NEXT:    ret i16 [[CALL]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@foo2
+; IS__CGSCC_NPM-SAME: (i16 [[A:%.*]]) [[ATTR1:#.*]] {
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]])
+; IS__CGSCC_NPM-NEXT:    ret i16 [[CALL]]
 ;
   %call = call i16 bitcast (i16 (i16, i16) * @bar2 to i16 (i16) *)(i16 %a)
   ret i16 %call
@@ -103,11 +109,18 @@ define internal i16 @bar2(i16 %p1, i16 %p2) {
 ; been provided),
 
 define dso_local i16 @vararg_tests(i16 %a) {
-; CHECK-LABEL: define {{[^@]+}}@vararg_tests
-; CHECK-SAME: (i16 [[A:%.*]]) {
-; CHECK-NEXT:    [[CALL2:%.*]] = call i16 bitcast (i16 (i16, i16, ...)* @vararg_no_prop to i16 (i16)*)(i16 noundef 7)
-; CHECK-NEXT:    [[ADD:%.*]] = add i16 7, [[CALL2]]
-; CHECK-NEXT:    ret i16 [[ADD]]
+; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@vararg_tests
+; NOT_CGSCC_NPM-SAME: (i16 [[A:%.*]]) {
+; NOT_CGSCC_NPM-NEXT:    [[CALL2:%.*]] = call i16 bitcast (i16 (i16, i16, ...)* @vararg_no_prop to i16 (i16)*)(i16 noundef 7)
+; NOT_CGSCC_NPM-NEXT:    [[ADD:%.*]] = add i16 7, [[CALL2]]
+; NOT_CGSCC_NPM-NEXT:    ret i16 [[ADD]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@vararg_tests
+; IS__CGSCC_NPM-SAME: (i16 [[A:%.*]]) [[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[CALL2:%.*]] = call i16 bitcast (i16 (i16, i16, ...)* @vararg_no_prop to i16 (i16)*)(i16 noundef 7)
+; IS__CGSCC_NPM-NEXT:    [[ADD:%.*]] = add i16 7, [[CALL2]]
+; IS__CGSCC_NPM-NEXT:    ret i16 [[ADD]]
 ;
   %call1 = call i16 (i16, ...) @vararg_prop(i16 7, i16 8, i16 %a)
   %call2 = call i16 bitcast (i16 (i16, i16, ...) * @vararg_no_prop to i16 (i16) *) (i16 7)
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
index bdbfd20c0518..61b78d95c93a 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll
@@ -8,10 +8,16 @@
 ; argument type between the caller and callee.
 
 define dso_local i16 @foo(i16 %a) {
-; CHECK-LABEL: define {{[^@]+}}@foo
-; CHECK-SAME: (i16 [[A:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7)
-; CHECK-NEXT:    ret i16 [[CALL]]
+; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@foo
+; NOT_CGSCC_NPM-SAME: (i16 [[A:%.*]]) {
+; NOT_CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7)
+; NOT_CGSCC_NPM-NEXT:    ret i16 [[CALL]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@foo
+; IS__CGSCC_NPM-SAME: (i16 [[A:%.*]]) [[ATTR0:#.*]] {
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7)
+; IS__CGSCC_NPM-NEXT:    ret i16 [[CALL]]
 ;
   %call = call i16 bitcast (i16 (i16, i16) * @bar to i16 (i16, i32) *)(i16 %a, i32 7)
   ret i16 %call
@@ -25,7 +31,7 @@ define internal i16 @bar(i16 %p1, i16 %p2) {
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@bar
-; IS__CGSCC____-SAME: (i16 [[P1:%.*]], i16 returned [[P2:%.*]]) [[ATTR0:#.*]] {
+; IS__CGSCC____-SAME: (i16 [[P1:%.*]], i16 returned [[P2:%.*]]) [[ATTR1:#.*]] {
 ; IS__CGSCC____-NEXT:    ret i16 [[P2]]
 ;
   ret i16 %p2
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index 50c093af962d..5a2f968eadad 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2425,33 +2425,59 @@ indirectgoto:                                     ; preds = %lab0, %entry
 @e = global %struct.a* null
 
 define i32 @main() {
-; CHECK-LABEL: define {{[^@]+}}@main() {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[F:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    br label [[FOR_COND_0:%.*]]
-; CHECK:       for.cond.0:
-; CHECK-NEXT:    [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ]
-; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100
-; CHECK-NEXT:    br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]]
-; CHECK:       for.body.0:
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[G_0]], 1
-; CHECK-NEXT:    br label [[FOR_COND_0]]
-; CHECK:       for.end.0:
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 noundef 8)
-; CHECK-NEXT:    store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8
-; CHECK-NEXT:    [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a**
-; CHECK-NEXT:    store %struct.a* null, %struct.a** [[B]], align 8
-; CHECK-NEXT:    br label [[FOR_COND_1:%.*]]
-; CHECK:       for.cond.1:
-; CHECK-NEXT:    [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ]
-; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100
-; CHECK-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]]
-; CHECK:       for.body.1:
-; CHECK-NEXT:    [[CALL4:%.*]] = call i32 (i32*, ...) bitcast (i32 (i32)* @h to i32 (i32*, ...)*)(i32* nonnull [[F]])
-; CHECK-NEXT:    [[INC6]] = add nuw nsw i32 [[G_1]], 1
-; CHECK-NEXT:    br label [[FOR_COND_1]]
-; CHECK:       for.end.1:
-; CHECK-NEXT:    ret i32 0
+; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@main() {
+; NOT_CGSCC_NPM-NEXT:  entry:
+; NOT_CGSCC_NPM-NEXT:    [[F:%.*]] = alloca i32, align 4
+; NOT_CGSCC_NPM-NEXT:    br label [[FOR_COND_0:%.*]]
+; NOT_CGSCC_NPM:       for.cond.0:
+; NOT_CGSCC_NPM-NEXT:    [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ]
+; NOT_CGSCC_NPM-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100
+; NOT_CGSCC_NPM-NEXT:    br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]]
+; NOT_CGSCC_NPM:       for.body.0:
+; NOT_CGSCC_NPM-NEXT:    [[INC]] = add nuw nsw i32 [[G_0]], 1
+; NOT_CGSCC_NPM-NEXT:    br label [[FOR_COND_0]]
+; NOT_CGSCC_NPM:       for.end.0:
+; NOT_CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 noundef 8)
+; NOT_CGSCC_NPM-NEXT:    store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8
+; NOT_CGSCC_NPM-NEXT:    [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a**
+; NOT_CGSCC_NPM-NEXT:    store %struct.a* null, %struct.a** [[B]], align 8
+; NOT_CGSCC_NPM-NEXT:    br label [[FOR_COND_1:%.*]]
+; NOT_CGSCC_NPM:       for.cond.1:
+; NOT_CGSCC_NPM-NEXT:    [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ]
+; NOT_CGSCC_NPM-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100
+; NOT_CGSCC_NPM-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]]
+; NOT_CGSCC_NPM:       for.body.1:
+; NOT_CGSCC_NPM-NEXT:    [[CALL4:%.*]] = call i32 (i32*, ...) bitcast (i32 (i32)* @h to i32 (i32*, ...)*)(i32* nonnull [[F]])
+; NOT_CGSCC_NPM-NEXT:    [[INC6]] = add nuw nsw i32 [[G_1]], 1
+; NOT_CGSCC_NPM-NEXT:    br label [[FOR_COND_1]]
+; NOT_CGSCC_NPM:       for.end.1:
+; NOT_CGSCC_NPM-NEXT:    ret i32 0
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@main() {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    br label [[FOR_COND_0:%.*]]
+; IS__CGSCC____:       for.cond.0:
+; IS__CGSCC____-NEXT:    [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ]
+; IS__CGSCC____-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100
+; IS__CGSCC____-NEXT:    br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]]
+; IS__CGSCC____:       for.body.0:
+; IS__CGSCC____-NEXT:    [[INC]] = add nuw nsw i32 [[G_0]], 1
+; IS__CGSCC____-NEXT:    br label [[FOR_COND_0]]
+; IS__CGSCC____:       for.end.0:
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 noundef 8)
+; IS__CGSCC____-NEXT:    store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8
+; IS__CGSCC____-NEXT:    [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a**
+; IS__CGSCC____-NEXT:    store %struct.a* null, %struct.a** [[B]], align 8
+; IS__CGSCC____-NEXT:    br label [[FOR_COND_1:%.*]]
+; IS__CGSCC____:       for.cond.1:
+; IS__CGSCC____-NEXT:    [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ]
+; IS__CGSCC____-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100
+; IS__CGSCC____-NEXT:    br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]]
+; IS__CGSCC____:       for.body.1:
+; IS__CGSCC____-NEXT:    [[INC6]] = add nuw nsw i32 [[G_1]], 1
+; IS__CGSCC____-NEXT:    br label [[FOR_COND_1]]
+; IS__CGSCC____:       for.end.1:
+; IS__CGSCC____-NEXT:    ret i32 0
 ;
 entry:
   %f = alloca i32

From 1b176508c5f079752cc7aded00d87380a1d53e3d Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 12 Nov 2021 15:48:31 -0800
Subject: [PATCH 242/244] [Cloning] Clone metadata on function declarations

Previously we missed cloning metadata on function declarations because
we don't call CloneFunctionInto() on declarations in CloneModule().

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D113812
---
 llvm/lib/Transforms/Utils/CloneModule.cpp       | 12 ++++++++++--
 llvm/unittests/Transforms/Utils/CloningTest.cpp | 15 ++++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp
index a6327bbf21bc..75605eeed889 100644
--- a/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -142,10 +142,18 @@ std::unique_ptr<Module> llvm::CloneModule(
   // Similarly, copy over function bodies now...
   //
   for (const Function &I : M) {
-    if (I.isDeclaration())
+    Function *F = cast<Function>(VMap[&I]);
+
+    if (I.isDeclaration()) {
+      // Copy over metadata for declarations since we're not doing it below in
+      // CloneFunctionInto().
+      SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+      I.getAllMetadata(MDs);
+      for (auto MD : MDs)
+        F->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
       continue;
+    }
 
-    Function *F = cast<Function>(VMap[&I]);
     if (!ShouldCloneDefinition(&I)) {
       // Skip after setting the correct linkage for an external reference.
       F->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/llvm/unittests/Transforms/Utils/CloningTest.cpp b/llvm/unittests/Transforms/Utils/CloningTest.cpp
index 016e772c2257..e0c10b52f328 100644
--- a/llvm/unittests/Transforms/Utils/CloningTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CloningTest.cpp
@@ -882,6 +882,10 @@ class CloneModule : public ::testing::Test {
     IBuilder.SetInsertPoint(Entry);
     IBuilder.CreateRetVoid();
 
+    auto *G =
+        Function::Create(FuncType, GlobalValue::ExternalLinkage, "g", OldM);
+    G->addMetadata(LLVMContext::MD_type, *MDNode::get(C, {}));
+
     // Finalize the debug info
     DBuilder.finalize();
   }
@@ -894,7 +898,11 @@ class CloneModule : public ::testing::Test {
 };
 
 TEST_F(CloneModule, Verify) {
-  EXPECT_FALSE(verifyModule(*NewM));
+  // Confirm the old module is (still) valid.
+  EXPECT_FALSE(verifyModule(*OldM, &errs()));
+
+  // Check the new module.
+  EXPECT_FALSE(verifyModule(*NewM, &errs()));
 }
 
 TEST_F(CloneModule, OldModuleUnchanged) {
@@ -912,6 +920,11 @@ TEST_F(CloneModule, Subprogram) {
   EXPECT_EQ(SP->getLine(), (unsigned)4);
 }
 
+TEST_F(CloneModule, FunctionDeclarationMetadata) {
+  Function *NewF = NewM->getFunction("g");
+  EXPECT_NE(nullptr, NewF->getMetadata(LLVMContext::MD_type));
+}
+
 TEST_F(CloneModule, GlobalMetadata) {
   GlobalVariable *NewGV = NewM->getGlobalVariable("gv");
   EXPECT_NE(nullptr, NewGV->getMetadata(LLVMContext::MD_type));

From 349dce922130162c879381d79ef2f5a0628dd37e Mon Sep 17 00:00:00 2001
From: Filippo Cremonese <filippocremonese@rev.ng>
Date: Mon, 7 Mar 2022 17:43:01 +0100
Subject: [PATCH 243/244] Quote ambiguous boolean-like strings

YAML v1.1 spec interprets more values as booleans than the YAML v1.2
spec. This commit quotes the ambiguous booleans (e.g. "off"). YAML 1.2
parsers will still correctly parse the quoted values, while YAML 1.1
will be able to correctly parse them as strings and not as booleans.

Ref YAML 1.1 spec: https://yaml.org/type/bool.html
---
 llvm/include/llvm/Support/YAMLTraits.h        |  7 +++++-
 .../MIR/X86/variable-sized-stack-objects.mir  |  4 ++--
 .../test/DebugInfo/COFF/global-type-hashes.ll |  2 +-
 .../ObjectYAML/MachO/DWARF-BigEndian.yaml     |  4 ++--
 .../ObjectYAML/MachO/DWARF-LittleEndian.yaml  |  4 ++--
 llvm/test/YAMLParser/bool.test                | 22 +++++++++++++++++--
 llvm/test/tools/llvm-size/common.test         |  2 +-
 7 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 3c8f913a9907..eb9a99c23a89 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -641,7 +641,12 @@ inline bool isNull(StringRef S) {
 inline bool isBool(StringRef S) {
   // FIXME: using parseBool is causing multiple tests to fail.
   return S.equals("true") || S.equals("True") || S.equals("TRUE") ||
-         S.equals("false") || S.equals("False") || S.equals("FALSE");
+         S.equals("yes") || S.equals("Yes") || S.equals ("YES") ||
+         S.equals("on") || S.equals("On") || S.equals ("ON") ||
+         S.equals("false") || S.equals("False") || S.equals("FALSE") ||
+         S.equals("no") || S.equals("No") || S.equals("NO") ||
+         S.equals("off") || S.equals("Off") || S.equals("OFF") ||
+         S.equals("y") || S.equals("Y") || S.equals("n") || S.equals("N");
 }
 
 // 5.1. Character Set
diff --git a/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
index e251ddac5f3b..5bcc7e5cb4d7 100644
--- a/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
+++ b/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
@@ -30,11 +30,11 @@ frameInfo:
 # CHECK-NEXT: - { id: 1, name: '', type: default, offset: -32, size: 8, alignment: 8,
 # CHECK-NEXT:  stack-id: default, callee-saved-register: '', callee-saved-restored: true,
 # CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1,
+# CHECK-NEXT: - { id: 2, name: 'y', type: variable-sized, offset: -32, alignment: 1,
 stack:
   - { id: 0, offset: -20, size: 4, alignment: 4 }
   - { id: 1, offset: -32, size: 8, alignment: 8 }
-  - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
+  - { id: 2, name: 'y', type: variable-sized, offset: -32, alignment: 1 }
 body: |
   bb.0.entry:
     MOV32mr $rsp, 1, _, -4, _, $edi
diff --git a/llvm/test/DebugInfo/COFF/global-type-hashes.ll b/llvm/test/DebugInfo/COFF/global-type-hashes.ll
index 70f9df156a5b..8ba56b5d150d 100644
--- a/llvm/test/DebugInfo/COFF/global-type-hashes.ll
+++ b/llvm/test/DebugInfo/COFF/global-type-hashes.ll
@@ -227,7 +227,7 @@ attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-ma
 ; YAML:               Attrs:           3
 ; YAML:               Type:            116
 ; YAML:               FieldOffset:     4
-; YAML:               Name:            Y
+; YAML:               Name:            'Y'
 ; YAML:           - Kind:            LF_ONEMETHOD
 ; YAML:             OneMethod:
 ; YAML:               Type:            4103
diff --git a/llvm/test/ObjectYAML/MachO/DWARF-BigEndian.yaml b/llvm/test/ObjectYAML/MachO/DWARF-BigEndian.yaml
index 886597a0129b..092e5c1e9c57 100644
--- a/llvm/test/ObjectYAML/MachO/DWARF-BigEndian.yaml
+++ b/llvm/test/ObjectYAML/MachO/DWARF-BigEndian.yaml
@@ -272,7 +272,7 @@ DWARF:
     - long long int
     - __absvdi2
     - a
-    - N
+    - 'N'
     - t
   debug_abbrev:
     - Table:
@@ -384,7 +384,7 @@ DWARF:
 #CHECK:     - long long int
 #CHECK:     - __absvdi2
 #CHECK:     - a
-#CHECK:     - N
+#CHECK:     - 'N'
 #CHECK:     - t
 #CHECK:   debug_abbrev:    
 #CHECK:     - Code:            0x1
diff --git a/llvm/test/ObjectYAML/MachO/DWARF-LittleEndian.yaml b/llvm/test/ObjectYAML/MachO/DWARF-LittleEndian.yaml
index 12bf9e70c982..04be63178a24 100644
--- a/llvm/test/ObjectYAML/MachO/DWARF-LittleEndian.yaml
+++ b/llvm/test/ObjectYAML/MachO/DWARF-LittleEndian.yaml
@@ -261,7 +261,7 @@ DWARF:
     - long long int
     - __absvdi2
     - a
-    - N
+    - 'N'
     - t
   debug_abbrev:
     - Table:
@@ -373,7 +373,7 @@ DWARF:
 #CHECK:     - long long int
 #CHECK:     - __absvdi2
 #CHECK:     - a
-#CHECK:     - N
+#CHECK:     - 'N'
 #CHECK:     - t
 #CHECK:   debug_abbrev:    
 #CHECK:     - Code:            0x1
diff --git a/llvm/test/YAMLParser/bool.test b/llvm/test/YAMLParser/bool.test
index e987a0ec1e32..885c78c6660f 100644
--- a/llvm/test/YAMLParser/bool.test
+++ b/llvm/test/YAMLParser/bool.test
@@ -1,6 +1,24 @@
 # RUN: yaml-bench -canonical %s
 
-- yes
-- NO
+- true
 - True
+- TRUE
+- yes
+- Yes
+- YES
 - on
+- On
+- ON
+- false
+- False
+- FALSE
+- no
+- No
+- NO
+- off
+- Off
+- OFF
+- y
+- Y
+- n
+- N
diff --git a/llvm/test/tools/llvm-size/common.test b/llvm/test/tools/llvm-size/common.test
index ac804778ed76..becc183875b5 100644
--- a/llvm/test/tools/llvm-size/common.test
+++ b/llvm/test/tools/llvm-size/common.test
@@ -34,7 +34,7 @@ Sections:
   - Name: .text
     Type: SHT_PROGBITS
 Symbols:
-  - Name:  y
+  - Name:  'y'
     Type:  STT_OBJECT
     Size:  4
     Index: SHN_COMMON

From ab6de2f3db030452e601f108f4d2882826d9566c Mon Sep 17 00:00:00 2001
From: Alessandro Di Federico <ale@rev.ng>
Date: Thu, 7 Apr 2022 12:40:35 +0200
Subject: [PATCH 244/244] YAMLParser: allow : and - at plain scalar start

This commit introduces support for having `:` and `-` at the beinning of
a YAML scalar. Right now, YAMLParser disallows them if we are in a flow
sequence (`[ ... ]`), but the YAML specification does not seem to hint
at this limitation.

This enables us to correctly parse YAML that has gone through other YAML
libraries and pyyaml in particular.
---
 llvm/lib/Support/YAMLParser.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index f68ba0d065c1..2f668c32fe39 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -1826,13 +1826,12 @@ bool Scanner::fetchMoreTokens() {
   StringRef FirstChar(Current, 1);
   if (!(isBlankOrBreak(Current)
         || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
-      || (*Current == '-' && !isBlankOrBreak(Current + 1))
-      || (!FlowLevel && (*Current == '?' || *Current == ':')
-          && isBlankOrBreak(Current + 1))
-      || (!FlowLevel && *Current == ':'
-                      && Current + 2 < End
-                      && *(Current + 1) == ':'
-                      && !isBlankOrBreak(Current + 2)))
+      || (FirstChar.find_first_of("?:-") != StringRef::npos
+          && !isBlankOrBreak(Current + 1))
+      || (*Current == ':'
+          && Current + 2 < End
+          && *(Current + 1) == ':'
+          && !isBlankOrBreak(Current + 2)))
     return scanPlainScalar();
 
   setError("Unrecognized character while tokenizing.", Current);